In [1]:
import sys
import pandas
sys.path.append('../')

from src.graph_utils import partition_graph_by_lonlat
import networkx as nx
from jurbey.jurbey import JURBEY

with open("../data/berlin.jurbey", 'rb') as tempf:
    g = JURBEY.load(tempf.read())
print(g.number_of_nodes())
g_partition = partition_graph_by_lonlat(g)


290100


**Convert to edge-based graph**

In [2]:
import networkx as nx
L = nx.line_graph(nx.DiGraph(g_partition))

In [3]:
nodes = list(L.nodes())
g_partition[nodes[10][0]][nodes[10][1]]['data']

Arc(arcType=<ArcType.LANE_STRAIGHT: 'LANE_STRAIGHT'>, roadClass=<RoadClass.MajorRoad: 2>, roadAccessibility=<RoadAccessibility.NoRestriction: 1>, metadata={'bicycle': 'no', 'highway': 'primary', 'lanes': '4', 'lit': 'yes', 'maxspeed': '50', 'name': 'Bismarckstraße', 'oneway': 'yes', 'postal_code': '10625', 'ref': 'B 2;B 5', 'surface': 'asphalt', 'turn:lanes': 'through|through|through;right|right'}, signs=[], vehicleAccessibility=[], geometry=[GeoCoordinates(lon=13.3207077, lat=52.5123944, alt=nan), GeoCoordinates(lon=13.3207877, lat=52.5123711, alt=nan)])

**Extract dynamic (speed) + static features from nodes**

In [89]:
def arc_features(arc, speed=None):
    arc = g_partition[arc[0]][arc[1]]
    return [ 
        int(arc['speed']) if speed !=None else speed, 
        arc['data'].metadata.get('lanes', '1'),
        arc['data'].metadata['highway'],
        arc['data'].roadClass.name
    ]
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
def construct_features():
    data = list()
    for node in L.nodes:
        data.append(arc_features(node))
    return enc.fit_transform(data)
    
X = construct_features()
    

scipy.sparse.csr.csr_matrix

In [5]:
enc.categories_

[array([5, 10, 15, 20, 25, 30, 50], dtype=object),
 array(['1', '2', '3', '4', '5'], dtype=object),
 array(['access_ramp', 'corridor', 'living_street', 'platform', 'primary',
        'residential', 'secondary', 'secondary_link', 'service',
        'tertiary', 'tertiary_link', 'unclassified'], dtype=object),
 array(['DirtRoad', 'LocalRoad', 'MajorRoad'], dtype=object)]

In [6]:
X.shape

(6163, 27)

In [61]:
type(X)

scipy.sparse.csr.csr_matrix

**Preprocess adjacency matrix**

In [7]:
adj = nx.to_scipy_sparse_matrix(L, format="coo")
import scipy.sparse as sp
import numpy as np
import torch

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)
                                    
def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
                                    
adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)
                                    
                                    

In [8]:
adj.shape

torch.Size([6163, 6163])

In [57]:
#Our speed data uses segment ids, but the model uses sequential indexes, based on `.nodes()`
import math
id_to_idx = {}
# defaultdict won't do what you expect in Pandas
df = pandas.read_csv("../data/timeseries_speed_april_first_week.csv")

id_to_idx = {}

for idx, id_ in enumerate(L.nodes()):
    id_to_idx[id_] = idx

df.shape

(6162, 147)

**Create rolling window tensor dataset**

In [99]:
import torch
import scipy.sparse
TOTAL_T_STEPS = 144

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
def build_dataset(from_=0, to=TOTAL_T_STEPS):
    dataset = list()
    for t in range(from_, to):
        features_at_t = [[50, '4', 'primary', 'MajorRoad']] * len(L.nodes)
        for _, row in df.iterrows():
            arc = (row['from_node'], row['to_node'])
            features_at_t[id_to_idx[arc]] = arc_features(arc, speed=row[str(t)])        
        dataset.append(enc.fit_transform(features_at_t))
    return dataset
    
X = build_dataset()

In [11]:
X = scipy.sparse.bmat(X)
X.shape

torch.Size([144, 6163, 6163])

In [29]:
data = {}
data['train'] = dataset
data['valid'] = dataset
data['test'] = dataset
dataset.shape

torch.Size([144, 6163, 6163])

In [33]:
def _tensor_rolling_window(dataset, window_size, step_size=1):
        # unfold dimension to make our rolling window
        return dataset.unfold(0, window_size, step_size)

a = _tensor_rolling_window(dataset=dataset, window_size=10)

In [40]:
a.shape

torch.Size([135, 6163, 6163, 10])

**Now start training**

In [14]:
from test_tube import Experiment 
from pytorch_lightning import Trainer
import os

# PyTorch summarywriter with a few bells and whistles    
exp = Experiment(save_dir=os.getcwd())

# pass in experiment for automatic tensorboard logging.    
trainer = Trainer(experiment=exp, max_nb_epochs=100, train_percent_check=1)



gpu available: False, used: False


In [52]:
import imp  
import src.tgcn.temporal_spatial_model as l
l = imp.reload(l)
TGCN = l.TGCN

model = TGCN(gcn_in=1, gcn_out=32, adj=adj, datasets=data)
trainer.fit(model)

  0%|          | 0/136 [00:00<?, ?it/s]

           Name              Type  Params
0           net               GCN  604006
1       net.gc1  GraphConvolution  197248
2       net.gc2  GraphConvolution  203379
3    net.linear            Linear  203379
4   net.dropout           Dropout       0
5         model             LSTMs   15477
6    model.lstm              LSTM    8448
7  model.linear            Linear    7029


  1%|          | 1/136 [00:03<08:04,  3.59s/it]

RuntimeError: size mismatch, m1: [6163 x 6163], m2: [32 x 6163] at ../aten/src/TH/generic/THTensorMath.cpp:752