In [1]:
import sys
import pandas
sys.path.append('../')

from src.graph_utils import partition_graph_by_lonlat
import networkx as nx
from jurbey.jurbey import JURBEY

with open("../data/1558537930325.jurbey", 'rb') as tempf:
    g = JURBEY.load(tempf.read())
print(g.number_of_nodes())
g_partition = partition_graph_by_lonlat(g)


290100


**Convert to edge-based graph**

In [2]:
import networkx as nx
L = nx.line_graph(nx.DiGraph(g_partition))

In [3]:
nodes = list(L.nodes())
g_partition[nodes[10][0]][nodes[10][1]]['data']

Arc(arcType=<ArcType.LANE_STRAIGHT: 'LANE_STRAIGHT'>, roadClass=<RoadClass.MajorRoad: 2>, roadAccessibility=<RoadAccessibility.NoRestriction: 1>, metadata={'bicycle': 'no', 'highway': 'primary', 'lanes': '4', 'lit': 'yes', 'maxspeed': '50', 'name': 'Bismarckstraße', 'oneway': 'yes', 'postal_code': '10625', 'ref': 'B 2;B 5', 'surface': 'asphalt', 'turn:lanes': 'through|through|through;right|right'}, signs=[], vehicleAccessibility=[], geometry=[GeoCoordinates(lon=13.3207077, lat=52.5123944, alt=nan), GeoCoordinates(lon=13.3207877, lat=52.5123711, alt=nan)])

**Extract dynamic (speed) + static features from nodes**

In [93]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
enc = OneHotEncoder(handle_unknown='ignore')
scaler = StandardScaler()
def arc_features(arc):
    arc = g_partition[arc[0]][arc[1]]
    return [ 
        float(arc['data'].metadata.get('maxspeed', '50')), 
        arc['data'].metadata.get('lanes', '1'),
        arc['data'].metadata['highway'],
        arc['data'].metadata.get('surface', 'no_sur'),
        arc['data'].roadClass.name,
        arc['data'].roadAccessibility.name
    ]
def construct_features():
    data = list()
    for node in L.nodes:
        data.append(arc_features(node))
    return enc.fit_transform(data)
    
X = construct_features()
  

In [94]:
enc.categories_

[array([5.0, 10.0, 20.0, 30.0, 50.0], dtype=object),
 array(['1', '2', '3', '4', '5'], dtype=object),
 array(['access_ramp', 'corridor', 'living_street', 'platform', 'primary',
        'residential', 'secondary', 'secondary_link', 'service',
        'tertiary', 'tertiary_link', 'unclassified'], dtype=object),
 array(['asphalt', 'cobblestone', 'cobblestone:flattened', 'concrete',
        'concrete:plates', 'grass_paver', 'no_sur', 'paved',
        'paving_stones', 'sett'], dtype=object),
 array(['DirtRoad', 'LocalRoad', 'MajorRoad'], dtype=object),
 array(['NoRestriction'], dtype=object)]

In [6]:
X.shape

(6163, 25)

In [7]:
type(X)

scipy.sparse.csr.csr_matrix

**Preprocess adjacency matrix**

In [8]:
adj = nx.to_scipy_sparse_matrix(L, format="coo")
import scipy.sparse as sp
import numpy as np
import torch

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)
                                    
def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
                                    
adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)
                                    
                                    

In [9]:
adj.shape

torch.Size([6163, 6163])

In [78]:
#Our speed data uses segment ids, but the model uses sequential indexes, based on `.nodes()`
import math
id_to_idx = {}
# defaultdict won't do what you expect in Pandas
df = pandas.read_csv("../data/timeseries_speed_april_first_week.csv")
df = df.T
l = (df.isnull().mean() < 0.5).tolist()

indices = [i for i, x in enumerate(l) if x == True]
print(indices)

[4, 7, 16, 124, 157, 204, 351, 352, 379, 389, 430, 487, 492, 513, 539, 551, 568, 569, 587, 613, 630, 639, 649, 675, 680, 698, 711, 713, 780, 782, 784, 865, 1037, 1062, 1070, 1079, 1080, 1088, 1091, 1125, 1206, 1215, 1217, 1238, 1241, 1397, 1400, 1450, 1517, 1550, 1556, 1560, 1564, 1574, 1703, 1733, 1756, 1804, 1805, 1806, 1807, 1819, 1824, 1853, 1907, 1917, 1964, 1996, 1999, 2003, 2007, 2029, 2030, 2033, 2036, 2037, 2040, 2042, 2098, 2099, 2102, 2104, 2116, 2119, 2123, 2128, 2137, 2143, 2210, 2261, 2281, 2284, 2289, 2358, 2382, 2387, 2394, 2406, 2411, 2412, 2473, 2487, 2550, 2568, 2604, 2605, 2649, 2657, 2674, 2677, 2738, 2753, 2766, 2788, 2794, 2809, 2851, 2853, 2869, 2871, 2917, 2925, 2956, 3081, 3086, 3087, 3088, 3090, 3117, 3120, 3132, 3223, 3264, 3450, 3458, 3459, 3530, 3577, 3587, 3627, 3629, 3645, 3728, 3792, 3870, 3974, 3984, 3993, 4006, 4064, 4065, 4083, 4084, 4303, 4445, 4448, 4550, 4551, 4572, 4602, 4655, 4673, 4688, 4698, 4700, 4704, 4798, 4814, 4866, 4889, 4953, 4967, 4992

In [79]:
id_to_idx = {}

for idx, id_ in enumerate(L.nodes()):
    id_to_idx[id_] = idx
df = df.T
df = df.loc[:, df.columns != 'Unnamed: 0']
df = df.convert_objects(convert_numeric=True)


df2 = df['from_node']
df3 = df['to_node']

df_filled = df.loc[:, df.columns != 'from_node']
df_filled = df.loc[:, df.columns != 'to_node']


df_filled = df_filled.T
for column in df_filled:
    df_filled[column] = pandas.to_numeric(df_filled[column])

df_filled = df_filled.interpolate(method='nearest', axis=1)

df_filled = df_filled.fillna(method='backfill')
df_filled = df_filled.T
df_filled['from_node'] = df2
df_filled['to_node'] = df3

print(df_filled[0:10])


For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  import sys


           0          1          2          3          4          5  \
0  10.854457  10.854457  10.854457  10.854457   5.680978   5.680978   
1  10.854457  10.854457  10.854457  10.854457   5.680978   5.680978   
2  10.854457  10.854457  10.854457  10.854457  10.486210  10.486210   
3  10.468811  10.468811  10.468811  10.468811  10.486210  10.486210   
4  10.468811  10.468811  10.468811  10.468811  10.636621  10.288534   
5  10.468811  10.468811  10.468811  10.468811  10.636621  10.288534   
6  10.854457  10.854457  10.854457  10.854457  10.831409   5.680978   
7  10.278846  10.854457  10.854457  10.854457  10.831409   5.680978   
8  10.278846  10.854457  10.854457  10.854457  10.831409  10.298252   
9  10.278846  10.854457  10.854457  10.854457  10.831409  10.298252   

           6          7          8          9  ...        136        137  \
0   7.036838   1.499997  10.088634  10.587459  ...  10.581123  10.850843   
1   7.036838   1.499997  10.088634  10.587459  ...  10.581123  10.

In [80]:
df[0:10]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,136,137,138,139,140,141,142,143,from_node,to_node
0,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,10.587459,...,10.581123,,,,,,10.850843,10.036408,628154368,1023689595
1,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,10.587459,...,10.581123,,,,,,10.850843,10.036408,628154368,1023689595
2,,,,,,10.48621,,,10.937218,,...,,,,,,,,,527147009,27537239
3,,,,,,,,,,,...,,,,,,,,,527147009,26908815
4,,,,10.468811,10.636621,10.288534,10.617513,1.758539,3.404401,10.224568,...,6.098767,10.257657,10.586166,,10.669476,4.313417,10.392901,10.51831,628154370,3804638178
5,,,,,,,10.286999,,,10.83357,...,10.537584,10.286999,,,,7.238553,10.497979,10.491733,628154372,26938222
6,,,,10.854457,,5.680978,7.036838,1.499997,5.044317,10.587459,...,0.961866,,,,,,10.850843,10.036408,628154375,1560866145
7,10.278846,,,,10.831409,,3.140737,2.073876,0.385263,4.10448,...,10.505827,10.517314,,,,10.189923,10.43675,4.196431,5791596551,1321327852
8,,,,,,10.298252,1.220956,10.298252,,,...,,,,,,,,,5791621141,4782446443
9,,,,,,,,,,,...,,,,,,,,,5791621141,26875019


**Create rolling window tensor dataset**

In [81]:
import torch
import scipy.sparse
TOTAL_T_STEPS = 144

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
def build_dataset_to_sparse_tensor(from_=0, to=TOTAL_T_STEPS):
    dataset = list()
    for t in range(from_, to):
        features_at_t = [[50, '4', 'primary', 'MajorRoad']] * len(L.nodes)
        for _, row in df.iterrows():
            arc = (row['from_node'], row['to_node'])
            features_at_t[id_to_idx[arc]] = arc_features(arc, speed=row[str(t)])        
        dataset.append(sparse_mx_to_torch_sparse_tensor(enc.fit_transform(features_at_t)))
    return torch.stack(dataset, dim=0)
    
def build_dataset_to_numpy_tensor(from_=0, to=TOTAL_T_STEPS, df=None):
    dataset = list()
    for t in range(from_, to):
        cat_features_at_t = [[50, '4', 'primary', 'MajorRoad']] * len(L.nodes)
        speed_features_at_t = [50] * len(L.nodes) 
        for _, row in df.iterrows():

            arc = (row['from_node'], row['to_node'])
            cat_features_at_t[id_to_idx[arc]] = arc_features(arc)
            speed_features_at_t[id_to_idx[arc]] = row[str(t)]
        dataset.append(np.concatenate([scaler.fit_transform(np.array(speed_features_at_t).reshape(-1, 1)), enc.fit_transform(cat_features_at_t).toarray()], axis=1))
    return np.stack(dataset, axis=0)

Y = build_dataset_to_numpy_tensor(df=df)
Y_filled = build_dataset_to_numpy_tensor(df=df_filled)

In [13]:
Y.shape

(144, 6163, 26)

In [14]:
# serialize the sparse tensor dataset
# torch.save(X.indices(), "../data/dataset_st_indices.pt", pickle_protocol=4)
# torch.save(X.values(), "../data/dataset_st_values.pt", pickle_protocol=4)
# torch.save(X.size(), "../data/dataset_st_size.pt", pickle_protocol=4)

In [82]:
X = np.moveaxis(Y, source=(0,1,2), destination=(2,0,1))
X_filled = np.moveaxis(Y_filled, source=(0,1,2), destination=(2,0,1))


# num_vertices, num_features, num_timesteps
X.shape

(6163, 26, 144)

In [83]:
# Build mask tensor
X_masked = torch.where(torch.isnan(torch.from_numpy(X)), torch.tensor([0]), torch.tensor([1]))
X_masked = X_masked.bool()
print(X.shape)
print(X_masked.shape)

(6163, 26, 144)
torch.Size([6163, 26, 144])


In [84]:
def generate_dataset(X, num_timesteps_input, num_timesteps_output):
    """
    Takes node features for the graph and divides them into multiple samples
    along the time-axis by sliding a window of size (num_timesteps_input+
    num_timesteps_output) across it in steps of 1.
    :param X: Node features of shape (num_vertices, num_features,
    num_timesteps)
    :return:
        - Node features divided into multiple samples. Shape is
          (num_samples, num_vertices, num_features, num_timesteps_input).
        - Node targets for the samples. Shape is
          (num_samples, num_vertices, num_features, num_timesteps_output).
    """
    # Generate the beginning index and the ending index of a sample, which
    # contains (num_points_for_training + num_points_for_predicting) points
    indices = [(i, i + (num_timesteps_input + num_timesteps_output)) for i
               in range(X.shape[2] - (
                num_timesteps_input + num_timesteps_output) + 1)]

    # Save samples
    features, target = [], []
    for i, j in indices:
        features.append(
            X[:, :, i: i + num_timesteps_input])
        target.append(X[:, :, i + num_timesteps_input: j])

    return torch.from_numpy(np.array(features)).permute(0,1,3,2), \
           torch.from_numpy(np.array(target)).permute(0,1,3,2)




def generate_dataset_concat(X, X_masked, num_timesteps_input, num_timesteps_output):
    """
    Takes node features for the graph and divides them into multiple samples
    along the time-axis by sliding a window of size (num_timesteps_input+
    num_timesteps_output) across it in steps of 1.
    :param X: Node features of shape (num_vertices, num_features,
    num_timesteps)
    :return:
        - Node data (features + labels) divided into multiple samples. Shape is
          (num_samples, num_vertices, num_features, num_timesteps_input).
        
    """
    # Generate the beginning index and the ending index of a sample, which
    # contains (num_points_for_training + num_points_for_predicting) points
    indices = [(i, i + (num_timesteps_input + num_timesteps_output)) for i
               in range(X.shape[2] - (
                num_timesteps_input + num_timesteps_output) + 1)]

    # Save samples
    dataset = []
    mask = []
    for i, j in indices:
        dataset.append(X[:, :, i: j])
        mask.append(X_masked[:, :, i: j])

    return torch.from_numpy(np.array(dataset)), torch.stack(mask)

In [85]:
# training, validation, testing : 0.7, 0.1, 0.2
split_line1 = int(X.shape[2] * 0.7)
split_line2 = int(X.shape[2] * 0.9)
train_original_data = X_filled[:, :, :split_line1]
val_original_data = X_filled[:, :, split_line1:split_line2]
test_original_data = X_filled[:, :, split_line2:]

train_mask = X_masked[:, :, :split_line1]
valid_mask = X_masked[:, :, split_line1:split_line2]
test_mask = X_masked[:, :, split_line2:]


look_back = 9
look_ahead = 1
# num_samples, num_nodes, num_timesteps, num_features

training_data, train_mask = generate_dataset_concat(train_original_data, train_mask,
                                                       num_timesteps_input=look_back,
                                                       num_timesteps_output=look_ahead)
valid_data, valid_mask = generate_dataset_concat(val_original_data, valid_mask,
                                             num_timesteps_input=look_back,
                                             num_timesteps_output=look_ahead)
test_data, test_mask = generate_dataset_concat(test_original_data, test_mask,
                                               num_timesteps_input=look_back,
                                               num_timesteps_output=look_ahead)

print(f"shape of training: {training_data.shape}, {train_mask.shape}")
print(f"shape of validation: {valid_data.shape}, {valid_mask.shape}")
print(f"shape of testing: {test_data.shape}, {test_mask.shape}")

shape of training: torch.Size([91, 6163, 26, 10]), torch.Size([91, 6163, 26, 10])
shape of validation: torch.Size([20, 6163, 26, 10]), torch.Size([20, 6163, 26, 10])
shape of testing: torch.Size([6, 6163, 26, 10]), torch.Size([6, 6163, 26, 10])


In [86]:
train_mask[1, :, 0, 9:].shape

torch.Size([6163, 1])

In [87]:
data = {}
data['train'] = training_data
data['valid'] = valid_data
data['test'] = test_data

mask = {}
mask['train'] = train_mask
mask['valid'] = valid_mask
mask['test'] = test_mask
# batch shape: torch.Size([1, 6163, 26, 10])
print(valid_data.shape)
print(valid_mask[0, :, 0, 9:])
print(valid_data[0, :, 0, 9:])

print(valid_data[0, :, 0, 9:].masked_select(valid_mask[0, :, 0, 9:]))

torch.Size([20, 6163, 26, 10])
tensor([[False],
        [False],
        [False],
        ...,
        [False],
        [ True],
        [ True]])
tensor([[ 1.0103],
        [ 1.1195],
        [ 1.1002],
        ...,
        [-1.1023],
        [ 1.1352],
        [-0.8349]], dtype=torch.float64)
tensor([-1.7332e-02, -9.9638e-01, -9.8757e-01, -1.0694e+00, -1.1453e+00,
        -1.0197e+00,  1.1636e+00,  1.1948e+00,  1.1324e+00,  1.1311e+00,
         1.1352e+00,  1.1352e+00,  1.1352e+00, -9.1041e-01, -1.1507e+00,
        -8.4501e-01, -9.9979e-01,  1.0871e+00,  1.1435e+00, -8.6723e-01,
         1.1043e+00,  1.1688e+00,  1.1688e+00, -1.1635e+00, -1.1130e+00,
         1.1810e+00,  1.1810e+00,  1.0910e+00,  1.0407e+00,  5.9843e-01,
        -1.1102e+00,  1.0996e+00,  1.0945e+00, -8.5722e-01, -8.4220e-01,
        -8.6300e-01, -8.6300e-01, -8.6300e-01,  1.1941e+00, -8.3498e-01,
         1.1688e+00, -5.2789e-01, -1.1506e+00, -1.1132e+00,  1.0477e+00,
         1.0477e+00,  1.0213e+00,  1.0213e+00, 

**Now start training**

In [88]:
from test_tube import Experiment 
from pytorch_lightning import Trainer
import os

# PyTorch summarywriter with a few bells and whistles    
exp = Experiment(save_dir=os.getcwd())

# pass in experiment for automatic tensorboard logging.    
trainer = Trainer(experiment=exp, max_nb_epochs=30, train_percent_check=1)

gpu available: False, used: False


In [91]:
import imp  
import src.tgcn.temporal_spatial_model as l
import src.tgcn.layers.lstmcell as h
h = imp.reload(h)

l = imp.reload(l)
TGCN = l.TGCN
model = TGCN(input_dim=26, hidden_dim=26, layer_dim=2, output_dim=1, adj=adj, 
             datasets=data, mask=mask)
trainer.fit(model)

 59%|█████▊    | 65/111 [00:39<05:22,  7.01s/it, avg_val_loss=1.51, batch_nb=62, epoch=5, tng_loss=1.619, v_nb=29]

          Name        Type  Params
0      gc_lstm  GCLSTMCell    6318
1  gc_lstm.x2h      Linear    2808
2  gc_lstm.h2h      Linear    2808
3           fc      Linear      27
4      dropout     Dropout       0


 23%|██▎       | 26/111 [00:06<00:22,  3.79it/s, avg_val_loss=0.821, batch_nb=24, epoch=23, tng_loss=0.859, v_nb=29]]

KeyboardInterrupt: 