In [1]:
import sys
import pandas
sys.path.append('../')

from src.graph_utils import partition_graph_by_lonlat
import networkx as nx
from jurbey.jurbey import JURBEY

with open("../data/1558537930325.jurbey", 'rb') as tempf:
    g = JURBEY.load(tempf.read())
print(g.number_of_nodes())
g_partition = partition_graph_by_lonlat(g)


290100


**Convert to edge-based graph**

In [2]:
import networkx as nx
L = nx.line_graph(nx.DiGraph(g_partition))

In [3]:
nodes = list(L.nodes())
g_partition[nodes[10][0]][nodes[10][1]]['data']

Arc(arcType=<ArcType.LANE_STRAIGHT: 'LANE_STRAIGHT'>, roadClass=<RoadClass.MajorRoad: 2>, roadAccessibility=<RoadAccessibility.NoRestriction: 1>, metadata={'bicycle': 'no', 'highway': 'primary', 'lanes': '4', 'lit': 'yes', 'maxspeed': '50', 'name': 'Bismarckstraße', 'oneway': 'yes', 'postal_code': '10625', 'ref': 'B 2;B 5', 'surface': 'asphalt', 'turn:lanes': 'through|through|through;right|right'}, signs=[], vehicleAccessibility=[], geometry=[GeoCoordinates(lon=13.3207077, lat=52.5123944, alt=nan), GeoCoordinates(lon=13.3207877, lat=52.5123711, alt=nan)])

**Extract dynamic (speed) + static features from nodes**

In [4]:
def arc_features(arc, speed=None):
    arc = g_partition[arc[0]][arc[1]]
    return [ 
        int(arc['speed']) if speed !=None else speed, 
        arc['data'].metadata.get('lanes', '1'),
        arc['data'].metadata['highway'],
        arc['data'].roadClass.name
    ]
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
def construct_features():
    data = list()
    for node in L.nodes:
        data.append(arc_features(node))
    return enc.fit_transform(data)
    
X = construct_features()
    

In [5]:
enc.categories_

[array([None], dtype=object),
 array(['1', '2', '3', '4', '5'], dtype=object),
 array(['access_ramp', 'corridor', 'living_street', 'platform', 'primary',
        'residential', 'secondary', 'secondary_link', 'service',
        'tertiary', 'tertiary_link', 'unclassified'], dtype=object),
 array(['DirtRoad', 'LocalRoad', 'MajorRoad'], dtype=object)]

In [6]:
X.shape

(6163, 21)

In [7]:
type(X)

scipy.sparse.csr.csr_matrix

**Preprocess adjacency matrix**

In [8]:
adj = nx.to_scipy_sparse_matrix(L, format="coo")
import scipy.sparse as sp
import numpy as np
import torch

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)
                                    
def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
                                    
adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)
                                    
                                    

In [9]:
adj.shape

torch.Size([6163, 6163])

In [10]:
#Our speed data uses segment ids, but the model uses sequential indexes, based on `.nodes()`
import math
id_to_idx = {}
# defaultdict won't do what you expect in Pandas
df = pandas.read_csv("../data/timeseries_speed_april_first_week.csv")

id_to_idx = {}

for idx, id_ in enumerate(L.nodes()):
    id_to_idx[id_] = idx

df.shape

(6162, 147)

**Create rolling window tensor dataset**

In [11]:
import torch
import scipy.sparse
TOTAL_T_STEPS = 144

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
def build_dataset_to_sparse_tensor(from_=0, to=TOTAL_T_STEPS):
    dataset = list()
    for t in range(from_, to):
        features_at_t = [[50, '4', 'primary', 'MajorRoad']] * len(L.nodes)
        for _, row in df.iterrows():
            arc = (row['from_node'], row['to_node'])
            features_at_t[id_to_idx[arc]] = arc_features(arc, speed=row[str(t)])        
        dataset.append(sparse_mx_to_torch_sparse_tensor(enc.fit_transform(features_at_t)))
    return torch.stack(dataset, dim=0)
    
def build_dataset_to_numpy_tensor(from_=0, to=TOTAL_T_STEPS):
    dataset = list()
    for t in range(from_, to):
        features_at_t = [[50, '4', 'primary', 'MajorRoad']] * len(L.nodes)
        for _, row in df.iterrows():
            arc = (row['from_node'], row['to_node'])
            features_at_t[id_to_idx[arc]] = arc_features(arc, speed=row[str(t)])        
        dataset.append(enc.fit_transform(features_at_t).toarray())
    return np.stack(dataset, axis=0)

Y = build_dataset_to_numpy_tensor()

In [12]:
Y.shape

(144, 6163, 27)

In [22]:
# serialize the sparse tensor dataset
torch.save(X.indices(), "../data/dataset_st_indices.pt", pickle_protocol=4)
torch.save(X.values(), "../data/dataset_st_values.pt", pickle_protocol=4)
torch.save(X.size(), "../data/dataset_st_size.pt", pickle_protocol=4)

RuntimeError: Cannot get indices on an uncoalesced tensor, please call .coalesce() first

In [13]:
X = np.moveaxis(Y, source=(0,1,2), destination=(2,0,1))
# num_vertices, num_features, num_timesteps
X.shape

(6163, 27, 144)

In [14]:
def generate_dataset(X, num_timesteps_input, num_timesteps_output):
    """
    Takes node features for the graph and divides them into multiple samples
    along the time-axis by sliding a window of size (num_timesteps_input+
    num_timesteps_output) across it in steps of 1.
    :param X: Node features of shape (num_vertices, num_features,
    num_timesteps)
    :return:
        - Node features divided into multiple samples. Shape is
          (num_samples, num_vertices, num_features, num_timesteps_input).
        - Node targets for the samples. Shape is
          (num_samples, num_vertices, num_features, num_timesteps_output).
    """
    # Generate the beginning index and the ending index of a sample, which
    # contains (num_points_for_training + num_points_for_predicting) points
    indices = [(i, i + (num_timesteps_input + num_timesteps_output)) for i
               in range(X.shape[2] - (
                num_timesteps_input + num_timesteps_output) + 1)]

    # Save samples
    features, target = [], []
    for i, j in indices:
        features.append(
            X[:, :, i: i + num_timesteps_input])
        target.append(X[:, :, i + num_timesteps_input: j])

    return torch.from_numpy(np.array(features)).permute(0,1,3,2), \
           torch.from_numpy(np.array(target)).permute(0,1,3,2)




def generate_dataset_concat(X, num_timesteps_input, num_timesteps_output):
    """
    Takes node features for the graph and divides them into multiple samples
    along the time-axis by sliding a window of size (num_timesteps_input+
    num_timesteps_output) across it in steps of 1.
    :param X: Node features of shape (num_vertices, num_features,
    num_timesteps)
    :return:
        - Node data (features + labels) divided into multiple samples. Shape is
          (num_samples, num_vertices, num_features, num_timesteps_input).
        
    """
    # Generate the beginning index and the ending index of a sample, which
    # contains (num_points_for_training + num_points_for_predicting) points
    indices = [(i, i + (num_timesteps_input + num_timesteps_output)) for i
               in range(X.shape[2] - (
                num_timesteps_input + num_timesteps_output) + 1)]

    # Save samples
    dataset = []
    for i, j in indices:
        dataset.append(X[:, :, i: j])

    return torch.from_numpy(np.array(dataset))

In [15]:
# training, validation, testing : 0.7, 0.1, 0.2
split_line1 = int(X.shape[2] * 0.7)
split_line2 = int(X.shape[2] * 0.8)
train_original_data = X[:, :, :split_line1]
val_original_data = X[:, :, split_line1:split_line2]
test_original_data = X[:, :, split_line2:]

look_back = 9
look_ahead = 3
# num_samples, num_nodes, num_timesteps, num_features
training_data = generate_dataset_concat(train_original_data,
                                                       num_timesteps_input=look_back,
                                                       num_timesteps_output=look_ahead)
valid_data = generate_dataset_concat(val_original_data,
                                             num_timesteps_input=look_back,
                                             num_timesteps_output=look_ahead)
test_data = generate_dataset_concat(test_original_data,
                                               num_timesteps_input=look_back,
                                               num_timesteps_output=look_ahead)

print(f"shape of training: {training_data.shape}")
print(f"shape of validation: {valid_data.shape}")
print(f"shape of testing: {test_data.shape}")

shape of training: torch.Size([89, 6163, 27, 12])
shape of validation: torch.Size([4, 6163, 27, 12])
shape of testing: torch.Size([18, 6163, 27, 12])


In [16]:
data = {}
data['train'] = (training_data)
data['valid'] = (valid_data)
data['test'] = (test_data)

**Now start training**

In [17]:
from test_tube import Experiment 
from pytorch_lightning import Trainer
import os

# PyTorch summarywriter with a few bells and whistles    
exp = Experiment(save_dir=os.getcwd())

# pass in experiment for automatic tensorboard logging.    
trainer = Trainer(experiment=exp, max_nb_epochs=100, train_percent_check=1)



gpu available: False, used: False


In [24]:
%reload_ext autoreload
%autoreload 2

In [29]:
import imp  
import src.tgcn.temporal_spatial_model as l
l = imp.reload(l)
TGCN = l.TGCN
model = TGCN(gcn_in=27, gcn_out=32, adj=adj, datasets=data)
trainer.fit(model)

[autoreload of src.tgcn.layers.tempcells failed: Traceback (most recent call last):
  File "/Users/nguytu3/anaconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/Users/nguytu3/anaconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 450, in superreload
    update_generic(old_obj, new_obj)
  File "/Users/nguytu3/anaconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 387, in update_generic
    update(a, b)
  File "/Users/nguytu3/anaconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 357, in update_class
    update_instances(old, new)
  File "/Users/nguytu3/anaconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 317, in update_instances
    update_instances(old, new, obj, visited)
  File "/Users/nguytu3/anaconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 300, in update_instances
    for

               Name              Type  Params
0               net               GCN    3008
1           net.gc1  GraphConvolution     896
2           net.gc2  GraphConvolution    1056
3        net.linear            Linear    1056
4       net.dropout           Dropout       0
5             model         TempCells  220275
6  model.lstm_cell1          LSTMCell    8448
7  model.lstm_cell2          LSTMCell    8448
8      model.linear            Linear  203379
shape of gcn_embs: torch.Size([6163, 32, 9])


RuntimeError: input has inconsistent input_size: got 1, expected 32