In [None]:

import geopandas as gpd
import momepy
import networkx as nx
import graph_tool.all as gt
from scripts.nx2gt import nx2gt
import pandas as pd
import matplotlib
import itertools
from shapely.geometry import LineString, point
from shapely.wkt import loads
import geopandas as gpd
from tqdm import tqdm
import numpy as np
from dask.distributed import Client, LocalCluster
from dask import delayed
import dask.dataframe as dd
import dask
import math

In [None]:
place = "singapore"

In [None]:
daskCluster = LocalCluster(threads_per_worker=2,
                n_workers=8, memory_limit='100GB')

client = Client(daskCluster)

client

In [None]:
streets = gpd.read_parquet(f"./out/{place}/streets.pq").explode()

streets['edgeID'] = momepy.unique_id(streets)

In [None]:
# Create a pandas dataframe from the multigraph
primal = momepy.gdf_to_nx(streets, length = "mm_len", approach='primal')

In [None]:
vertID_dict = {}

In [None]:
counter = itertools.count()

for node in primal.nodes():
    id = next(counter)
    primal.nodes[node]['vertID'] = id
    vertID_dict[id] = node

In [None]:
inverted_vertID_dict = {value: key for key, value in vertID_dict.items()}

In [None]:
primal_scattered = client.scatter(primal)

@delayed
def compute_ego_graph(n, primal_scattered, radius):
    graph = nx2gt(nx.ego_graph(primal_scattered, n, radius, distance="mm_len"))
    return (n, graph)

tasks = []
for n in primal.nodes():
    task = compute_ego_graph(n, primal_scattered, 400)
    tasks.append(task)
    
results = dask.compute(*tasks)

In [None]:
ego_graphs = {}

for n, ego_graph in results:
    ego_graphs[inverted_vertID_dict[n]] = {}
    # 
    ego_graphs[inverted_vertID_dict[n]]["graph"] = ego_graph

    if len(list(ego_graph.edges())) > 1:
        ego_graphs[inverted_vertID_dict[n]]["weight"] = ego_graph.edge_properties["mm_len"]
    else:
        ego_graphs[inverted_vertID_dict[n]]["weight"] = None
        
    ego_graphs[inverted_vertID_dict[n]]["node"] = ego_graph.get_vertices()[[i for i,v in enumerate(ego_graph.vp.vertID.get_array()) if v == inverted_vertID_dict[n]][0]]

In [None]:
# Convert it to a graph-tool graph
gtG = nx2gt(primal)

gtG.list_properties()

In [None]:
g = gt.GraphView(gtG, vfilt=gt.label_largest_component(gtG))
mm_len = gtG.edge_properties["mm_len"]
vp, ep = gt.betweenness(g, weight = mm_len)

In [None]:
edgeID = g.ep.edgeID.get_array()

betweeness_centrality = ep.get_array()

geometry = g.ep.geometry.get_array()

edges_frame = pd.merge(pd.DataFrame({'edgeID': edgeID, 'betweeness_centrality_edge': betweeness_centrality, 'geometry': geometry}), streets, on='edgeID').drop(columns=["geometry_x"]).rename(columns={"geometry_y": "geometry"})
edges_frame = gpd.GeoDataFrame(edges_frame, geometry=edges_frame['geometry'], crs=streets.crs)
edges_frame = edges_frame.sort_values(by=['edgeID'], ascending=True).reset_index(drop=True)
edges_frame

In [None]:
vertex_id = g.vp.vertID.get_array()

vp_array = vp.get_array()

vertices_frame = pd.DataFrame({'vertID': vertex_id, 'betweeness_centrality_vertex': vp_array})
# convert vertID_dict to a DataFrame and rename the index and column
vertID_df = pd.DataFrame.from_dict(vertID_dict, orient='index').rename(columns={0: "y", 1: "x"})
# merge vertID_df and vertices on vertID and node, respectively
vertices_frame = vertID_df.merge(vertices_frame, left_index=True, right_on='vertID')
vertices_frame["geometry"] = vertices_frame.apply(lambda x: point.Point(x["x"], x["y"]), axis=1)
vertices_frame = vertices_frame.drop(columns=["x", "y"])


In [None]:
node_degree = {g.vertex_index[v]: v.in_degree() + v.out_degree() for v in g.vertices()}
# convert vertID_dict to a DataFrame and rename the index and column
node_degree = pd.DataFrame.from_dict(node_degree, orient='index').rename(columns={0: "node_degree"})
# merge vertID_df and vertices on vertID and node, respectively
vertices_frame = vertices_frame.merge(node_degree, right_index=True, left_on='vertID')

In [None]:
closeness = {k:gt.closeness(ego_graphs[k]["graph"], weight=ego_graphs[k]["weight"], source=ego_graphs[k]["node"]) for k in tqdm(ego_graphs)}
closeness_df = pd.DataFrame({'closeness': closeness})
closeness_df = closeness_df.applymap(lambda x: x.astype(float)).fillna(0)
vertices_frame = vertices_frame.merge(closeness_df, right_index=True, left_on='vertID')

In [None]:
global_closeness_centrality = gt.closeness(g, weight = mm_len).get_array()
global_closeness_centrality = pd.DataFrame({'global_closeness_centrality': global_closeness_centrality})
vertices_frame = vertices_frame.merge(global_closeness_centrality, right_index=True, left_on='vertID')

In [None]:
global_clustering_400m = {k:float(gt.global_clustering(ego_graphs[k]["graph"])[0]) for k in tqdm(ego_graphs)}
global_clustering_400m  = pd.DataFrame.from_dict(global_clustering_400m , orient='index').rename(columns={0: "global_clustering_400m"})
vertices_frame = vertices_frame.merge(global_clustering_400m, right_index=True, left_on='vertID')

In [None]:
eigenvalue, eigenvector = gt.eigenvector(g, weight = mm_len, max_iter=100000)
eigenvector = pd.DataFrame({'eigenvector': g.ep.edgeID.get_array()})

vertices_frame = vertices_frame.merge(eigenvector, right_index=True, left_on='vertID')


In [None]:
node_centrality_400m = {k:len(ego_graphs[k]["graph"].get_vertices())-1 for k in ego_graphs}
node_centrality_400m  = pd.DataFrame.from_dict(node_centrality_400m , orient='index').rename(columns={0: "node_centrality_400m"})
vertices_frame = vertices_frame.merge(node_centrality_400m, right_index=True, left_on='vertID')

In [None]:
edges_frame

In [None]:
vertices_frame = gpd.GeoDataFrame(vertices_frame, geometry=vertices_frame['geometry'], crs=streets.crs)

In [None]:
vertices_frame['geometry'] = gpd.points_from_xy(vertices_frame.geometry.y, vertices_frame.geometry.x)

In [None]:
# convert vertices_frame and edges_frame to GeoDataFrames
vertices_frame = gpd.GeoDataFrame(vertices_frame, geometry=vertices_frame['geometry'], crs=streets.crs)
edges_frame = gpd.GeoDataFrame(edges_frame, geometry=edges_frame['geometry'], crs=streets.crs)

In [None]:
vertices_frame.to_parquet(f"./out/{place}/vertices_frame.pq")
edges_frame.to_parquet(f"./out/{place}/edges_frame.pq")

In [None]:
# tasks = []
# for n in primal.nodes():
#     task = compute_ego_graph(n, primal_scattered, 2000)
#     tasks.append(task)
    
# results = dask.compute(*tasks)

# ego_graphs_2000 = {}

# for n, ego_graph in results:
#     ego_graphs[inverted_vertID_dict[n]] = {}
#     # 
#     ego_graphs[inverted_vertID_dict[n]]["graph"] = ego_graph

#     if len(list(ego_graph.edges())) > 1:
#         ego_graphs[inverted_vertID_dict[n]]["weight"] = ego_graph.edge_properties["mm_len"]
#     else:
#         ego_graphs[inverted_vertID_dict[n]]["weight"] = None
        
#     ego_graphs[inverted_vertID_dict[n]]["node"] = ego_graph.get_vertices()[[i for i,v in enumerate(ego_graph.vp.vertID.get_array()) if v == inverted_vertID_dict[n]][0]]

In [None]:
# import pickle
# # pickle the object
# with open('ego_graphs_2000.pickle', 'wb') as f:
#     pickle.dump(ego_graphs_2000, f)

In [None]:
# straightness_df = {k:0 for k in tqdm(ego_graphs_2000)}

# def euclidean_dist(x1, y1, x2, y2):
#     return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)

# def bravo(target, k, vertID_dict, network_dist):
#     euclidean_distance = euclidean_dist(vertID_dict[k][0], vertID_dict[k][1], vertID_dict[target][0], vertID_dict[target][1])
#     return euclidean_distance / network_dist

# for k in tqdm(ego_graphs_2000):
#     ego_graph = ego_graphs_2000[k]["graph"]
#     straightness = 0
#     sp = gt.shortest_distance(ego_graph, k, target=gt.shortest_distance weights=ego_graph.edge_properties["mm_len"])

#     if len(sp.get_array()) > 0 and len(G) > 1:
#         for target, value in enumerate(sp):
#             if k != target:
#                 network_dist = sp_scattered[target]
#                 straightness += bravo(target, k, vertID_dict, network_dist)
#         straightness_df[k] = straightness * (1.0 / (len(vertID_dict.keys()) - 1.0))
#     else:
#         straightness_df[k] = 0

In [None]:
#too slow

# def euclidean_dist(x1, y1, x2, y2):
#     return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)

# def bravo(target, n, vertID_dict_scattered, network_dist):
#     euclidean_distance = euclidean_dist(vertID_dict_scattered[n][0], vertID_dict_scattered[n][1], vertID_dict_scattered[target][0], vertID_dict_scattered[target][1])
#     return euclidean_distance / network_dist

# @dask.delayed
# def alpha(G, n, vertID_dict_scattered):
#     straightness = 0
#     sp = gt.shortest_distance(G, n, weights=G.edge_properties["mm_len"])
#     sp_scattered = sp

#     if len(sp.get_array()) > 0 and len(G) > 1:
#         for target, value in enumerate(sp):
#             if n != target:
#                 network_dist = sp_scattered[target]
#                 straightness += bravo(target, n, vertID_dict_scattered, network_dist)
#         straightness_df = straightness * (1.0 / (len(vertID_dict_scattered.keys()) - 1.0))
#     else:
#         straightness_df = 0
    
#     return n, straightness_df

# def split_list(lst, chunk_size):
#     return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]\
        
# def straightness_centrality(G, vertID_dict):
    
#     # chunked_list = split_list(list(G.iter_vertices()), 32)
#     # G_scattered = client.scatter(G)
#     # vertID_dict_scattered = client.scatter(vertID_dict)
#     # result=[]
    
#     # for chunk in chunked_list:   
#     #     delayed_objs = [alpha(G_scattered, n, vertID_dict_scattered) for n in chunk]
#     #     new_results = client.compute(delayed_objs)
#         # result.append(new_results)
    
    
#     chunked_list = split_list(list(G.iter_vertices()), 32)
#     G_scattered = client.scatter(G)
#     vertID_dict_scattered = client.scatter(vertID_dict)
#     results=[]

#     for chunk in chunked_list:   
#         delayed_objs = [alpha(G_scattered, n, vertID_dict_scattered) for n in chunk]
#         new_results = client.compute(delayed_objs)
#         gathered_results = client.gather(new_results)
#         results.append(gathered_results)
        
#     return results

# straightness_df = straightness_centrality(gtG, vertID_dict)

In [None]:
# Create a pandas dataframe from the multigraph
dual = momepy.gdf_to_nx(streets, approach='dual')

In [None]:
# vertID_dict = {}

# counter = itertools.count()

# for node in dual.nodes():
#     id = next(counter)
#     dual.nodes[node]['vertID'] = id
#     vertID_dict[id] = node
    
# inverted_vertID_dict = {value: key for key, value in vertID_dict.items()}

# dual_scattered = client.scatter(dual)

# @delayed
# def compute_ego_graph(n, dual_scattered, radius):
#     graph = nx2gt(nx.ego_graph(dual_scattered, n, radius, distance="angle"))
#     return (n, graph)

# # tasks = []
# # for n in dual.nodes():
# #     task = compute_ego_graph(n, dual_scattered, 400)
# #     tasks.append(task)
    
# # results = dask.compute(*tasks)

# # define a function to update vertID_dict every 5000 entries

# chunk_size = 1000

# chunks = [list(dual.nodes())[i:i+chunk_size] for i in range(0, len(dual.nodes()), chunk_size)]

# len_sublist = len(chunks)

# results = []

# for i, sublist in enumerate(chunks):

#     delay_objs =[]
#     new_results = []
    
#     delay_objs = [compute_ego_graph(n, dual_scattered, 400) for n in sublist]
#     new_results = dask.compute(delay_objs)
#     {results.append(tup) for tup in new_results}

# ego_graphs = {}

# for n, ego_graph in results:
#     ego_graphs[inverted_vertID_dict[n]] = {}
#     # 
#     ego_graphs[inverted_vertID_dict[n]]["graph"] = ego_graph

#     if len(list(ego_graph.edges())) > 1:
#         ego_graphs[inverted_vertID_dict[n]]["weight"] = ego_graph.edge_properties["angle"]
#     else:
#         ego_graphs[inverted_vertID_dict[n]]["weight"] = None
        
#     ego_graphs[inverted_vertID_dict[n]]["node"] = ego_graph.get_vertices()[[i for i,v in enumerate(ego_graph.vp.vertID.get_array()) if v == inverted_vertID_dict[n]][0]]
    
# # Convert it to a graph-tool graph
# gtG = nx2gt(dual)

# gtG.list_properties()

In [None]:
# import pickle

# # pickle gtG as dual_graph
# def pickle_graph(graph, filename):
#     with open(filename, 'wb') as f:
#         pickle.dump(graph, f)
        
# pickle_graph(gtG, f"./out/{place}/dual_graph.pkl")

In [None]:
# closeness = {k:gt.closeness(ego_graphs[k]["graph"], weight=ego_graphs[k]["weight"], source=ego_graphs[k]["node"]) for k in tqdm(ego_graphs)}
# closeness_df = pd.DataFrame({'closeness': closeness})
# closeness_df = closeness_df.applymap(lambda x: x.astype(float)).fillna(0)

# angle = gtG.edge_properties["angle"]

# global_closeness_centrality = gt.closeness(g, weight = angle).get_array()

# vp, ep = gt.betweenness(g, weight = angle)

# dual_vertices_frame = pd.DataFrame({'vertID': vertex_id, 'angcloseness400': closeness_df, 'closeness_global_ang': global_closeness_centrality})


In [None]:
# dual_vertices_frame.to_parquet(f"./out/{place}/dual_vertices_frame.parquet")

In [None]:
daskCluster.close()
client.shutdown()
