In [2]:
import sys
import pandas
sys.path.append('../')

from src.graph_utils import partition_graph_by_lonlat
import networkx as nx
from jurbey.jurbey import JURBEY

with open("../data/berlin.jurbey", 'rb') as tempf:
    g = JURBEY.load(tempf.read())
print(g.number_of_nodes())
g_partition = partition_graph_by_lonlat(g)


290100


**Convert to edge-based graph**

In [18]:
import networkx as nx
L = nx.line_graph(nx.DiGraph(g_partition))

In [50]:
nodes = list(L.nodes())
nodes[0]
g_partition[nodes[10][0]][nodes[10][1]]['data']

Arc(arcType=<ArcType.LANE_STRAIGHT: 'LANE_STRAIGHT'>, roadClass=<RoadClass.MajorRoad: 2>, roadAccessibility=<RoadAccessibility.NoRestriction: 1>, metadata={'bicycle': 'no', 'highway': 'primary', 'lanes': '4', 'lit': 'yes', 'maxspeed': '50', 'name': 'Bismarckstraße', 'oneway': 'yes', 'postal_code': '10625', 'ref': 'B 2;B 5', 'surface': 'asphalt', 'turn:lanes': 'through|through|through;right|right'}, signs=[], vehicleAccessibility=[], geometry=[GeoCoordinates(lon=13.3207077, lat=52.5123944, alt=nan), GeoCoordinates(lon=13.3207877, lat=52.5123711, alt=nan)])

In [62]:
def arc_features(arc):
    arc = g_partition[arc[0]][arc[1]]
    return [ 
        int(arc['speed']), 
        arc['data'].metadata.get('lanes', '1'),
        arc['data'].metadata['highway'],
        arc['data'].roadClass.name
    ]
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
def construct_features():
    data = list()
    for node in L.nodes:
        data.append(arc_features(node))
    return enc.fit_transform(data)
    
X = construct_features()
    

In [63]:
enc.categories_

[array([5, 10, 15, 20, 25, 30, 50], dtype=object),
 array(['1', '2', '3', '4', '5'], dtype=object),
 array(['access_ramp', 'corridor', 'living_street', 'platform', 'primary',
        'residential', 'secondary', 'secondary_link', 'service',
        'tertiary', 'tertiary_link', 'unclassified'], dtype=object),
 array(['DirtRoad', 'LocalRoad', 'MajorRoad'], dtype=object)]

In [64]:
type(X)

scipy.sparse.csr.csr_matrix

In [19]:
adj = nx.to_scipy_sparse_matrix(L, format="coo")
import scipy.sparse as sp
import numpy as np
import torch

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)
                                    
def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
                                    
adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)
                                    
                                    

In [20]:
adj.shape

torch.Size([6163, 6163])

In [4]:
#Our speed data uses segment ids, but the model uses sequential indexes, based on `.nodes()`
import math
id_to_idx = {}
# defaultdict won't do what you expect in Pandas
df = pandas.read_csv("../data/timeseries_speed_april_first_week.csv")


for id_ in df["from_node"].unique():
    id_to_idx[id_] = math.nan
for id_ in df["to_node"].unique():
    id_to_idx[id_] = math.nan
    
for idx, id_ in enumerate(g_partition.nodes()):
    id_to_idx[id_] = idx
# -

# Let's transform ids to indeces
df["from_node_idx"] = df.replace({"from_node": id_to_idx})["from_node"]
df["to_node_idx"] = df.replace({"to_node": id_to_idx})["to_node"]

df.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,138,139,140,141,142,143,from_node,to_node,from_node_idx,to_node_idx
0,result.average,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,...,,,,,10.850843,10.036408,628154368,1023689595,0,350
1,result.average,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,...,,,,,10.850843,10.036408,628154368,1023689595,0,350
2,result.average,,,,,,10.48621,,,10.937218,...,,,,,,,527147009,27537239,1,1608
3,result.average,,,,,,,,,,...,,,,,,,527147009,26908815,1,2630
4,result.average,,,,10.468811,10.636621,10.288534,10.617513,1.758539,3.404401,...,10.586166,,10.669476,4.313417,10.392901,10.51831,628154370,3804638178,2,1197


In [5]:
df = df.T
print(f"original number of road segment: {df.shape[1]}")
df = df.loc[:, df.isnull().mean() < 0.5]
print(f"original number of road segment after filtering: {df.shape[1]}")
df = df.T
df = df.loc[:, df.columns != 'Unnamed: 0']
df = df.loc[:, df.columns != 'from_node']
df = df.loc[:, df.columns != 'to_node']
print(df[0:10])
df = df.T

original number of road segment: 6162
original number of road segment after filtering: 229
           0        1    2        3        4        5          6         7  \
4        NaN      NaN  NaN  10.4688  10.6366  10.2885    10.6175   1.75854   
7    10.2788      NaN  NaN      NaN  10.8314      NaN    3.14074   2.07388   
16   10.8814      NaN  NaN      NaN  10.9489  10.1905    8.48206   10.6813   
124  10.2644      NaN  NaN  10.5666      NaN  7.37511   0.915255   10.1382   
157      NaN      NaN  NaN      NaN  10.8314      NaN    7.53377   5.43425   
204      NaN  10.0555  NaN      NaN      NaN      NaN  0.0954053    10.709   
351      NaN      NaN  NaN  10.4688  10.4049  7.20193    7.68655   7.31936   
352      NaN      NaN  NaN   10.471  10.3797  0.40066   0.149782  0.486138   
379  10.3584      NaN  NaN      NaN  10.6964  6.91769    5.38908   10.8364   
389      NaN      NaN  NaN  10.4688    10.38   7.0872    8.65096   10.2432   

            8        9  ...      136      137     

In [None]:
import torch
TOTAL_T_STEPS = 144


def snapshot(t, df=df, g_partition=g_partition):
    df_t = df[[t, "from_node_idx", "to_node_idx"]]
    df_t = df_t.dropna()
    row = df_t["from_node_idx"].tolist()
    col = df_t["to_node_idx"].tolist()
    data = df_t[t].tolist()
    size = len(g_partition.nodes())  

    return {"indices": (row, col), "values": data, "shape": (size, size)}


