In [None]:
%reload_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../')

In [3]:
TOTAL_T_STEPS = 144

## Get Jurbey Sub-Graph

In [4]:
from src.graph_utils import partition_graph_by_lonlat
from jurbey.jurbey import JURBEY

with open("../data/1556798416403.jurbey", 'rb') as tempf:
    g = JURBEY.load(tempf.read())
g_partition = partition_graph_by_lonlat(g)

## Build a dataframe with all time and static features

In [5]:
import pandas

In [6]:
df = pandas.read_csv("../data/timeseries_speed_april_first_week.csv")
df = df.drop(columns=["Unnamed: 0"])

In [7]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,136,137,138,139,140,141,142,143,from_node,to_node
0,,,,28.817616,29.111668,8.288389,6.779508,10.833259,9.54078,7.870202,...,9.136869,11.530145,8.263133,21.063414,6.517024,8.931566,9.542779,7.236827,628154368,1023689595
1,,,,28.817616,29.111668,8.288389,6.779508,10.833259,9.54078,7.870202,...,9.136869,11.530145,8.263133,21.063414,6.517024,8.931566,9.542779,7.236827,628154368,1023689595
2,,,18.285511,,,10.48621,16.631804,11.133786,7.663071,14.41779,...,,,,,,,,,527147009,27537239
3,,,,,19.033088,,,4.873969,12.137977,9.498607,...,,,,12.890066,,,13.178754,,527147009,26908815
4,,,10.952773,20.377332,9.472034,7.588203,9.702233,6.167263,5.860414,7.97867,...,6.705081,9.68438,29.012508,14.253002,8.932515,6.7644,10.154796,10.82118,628154370,3804638178


### Let's add more columns for static features

In [8]:
import math
def get_static_features(row):
    arc = g_partition[row['from_node']][row['to_node']]
    return (
        arc['data'].metadata['highway'],
        arc['data'].metadata.get('surface', None),
        arc['data'].roadClass.name,
        arc['data'].metadata.get('maxspeed', math.nan),
        arc['data'].metadata.get('lanes', '1')
    )

In [9]:
df["highway"], df["surface"], df["roadClass"], df["maxspeed"], df["lines"] = zip(*df.apply(get_static_features, axis=1))

In [10]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,141,142,143,from_node,to_node,highway,surface,roadClass,maxspeed,lines
0,,,,28.817616,29.111668,8.288389,6.779508,10.833259,9.54078,7.870202,...,8.931566,9.542779,7.236827,628154368,1023689595,primary,asphalt,MajorRoad,50,4
1,,,,28.817616,29.111668,8.288389,6.779508,10.833259,9.54078,7.870202,...,8.931566,9.542779,7.236827,628154368,1023689595,primary,asphalt,MajorRoad,50,4
2,,,18.285511,,,10.48621,16.631804,11.133786,7.663071,14.41779,...,,,,527147009,27537239,tertiary,asphalt,LocalRoad,50,1
3,,,,,19.033088,,,4.873969,12.137977,9.498607,...,,13.178754,,527147009,26908815,tertiary,asphalt,LocalRoad,50,1
4,,,10.952773,20.377332,9.472034,7.588203,9.702233,6.167263,5.860414,7.97867,...,6.7644,10.154796,10.82118,628154370,3804638178,primary,asphalt,MajorRoad,50,4


In [11]:
df_dummies = df_dummies = pandas.get_dummies(df, columns=["highway", "surface", "roadClass", "maxspeed", "lines"], dummy_na=True)

In [12]:
df_dummies.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,maxspeed_30,maxspeed_5,maxspeed_50,maxspeed_nan,lines_1,lines_2,lines_3,lines_4,lines_5,lines_nan
0,,,,28.817616,29.111668,8.288389,6.779508,10.833259,9.54078,7.870202,...,0,0,1,0,0,0,0,1,0,0
1,,,,28.817616,29.111668,8.288389,6.779508,10.833259,9.54078,7.870202,...,0,0,1,0,0,0,0,1,0,0
2,,,18.285511,,,10.48621,16.631804,11.133786,7.663071,14.41779,...,0,0,1,0,1,0,0,0,0,0
3,,,,,19.033088,,,4.873969,12.137977,9.498607,...,0,0,1,0,1,0,0,0,0,0
4,,,10.952773,20.377332,9.472034,7.588203,9.702233,6.167263,5.860414,7.97867,...,0,0,1,0,0,0,0,1,0,0


In [13]:
df_unique = df_dummies.drop_duplicates()

## Let's now make an adjecancy matrix, that matches the order in our dataframe

In [14]:
import networkx as nx
import scipy.sparse as sp
import numpy as np
import torch

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)
                                    
def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

In [15]:
L = nx.line_graph(nx.DiGraph(g_partition))

nodelist = [tuple(x) for x in df_unique[['from_node','to_node']].values]

In [16]:
adj = nx.to_scipy_sparse_matrix(L, format="coo", nodelist=nodelist)
# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)

### Now let's build time-series dataset

In [17]:
static_features = ['highway_access_ramp',
 'highway_corridor',
 'highway_living_street',
 'highway_platform',
 'highway_primary',
 'highway_residential',
 'highway_secondary',
 'highway_secondary_link',
 'highway_service',
 'highway_tertiary',
 'highway_tertiary_link',
 'highway_unclassified',
 'highway_nan',
 'surface_asphalt',
 'surface_cobblestone',
 'surface_cobblestone:flattened',
 'surface_concrete',
 'surface_concrete:plates',
 'surface_grass_paver',
 'surface_paved',
 'surface_paving_stones',
 'surface_sett',
 'surface_nan',
 'roadClass_DirtRoad',
 'roadClass_LocalRoad',
 'roadClass_MajorRoad',
 'roadClass_nan',
 'maxspeed_10',
 'maxspeed_20',
 'maxspeed_30',
 'maxspeed_5',
 'maxspeed_50',
 'maxspeed_nan',
 'lines_1',
 'lines_2',
 'lines_3',
 'lines_4',
 'lines_5',
 'lines_nan']

In [18]:
len(static_features)

39

In [19]:
mask_df = df_unique.notna()
static_df = df_unique[static_features]
speed_df = df_unique[map(str, list(range(TOTAL_T_STEPS)))]

In [20]:
mask_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,maxspeed_30,maxspeed_5,maxspeed_50,maxspeed_nan,lines_1,lines_2,lines_3,lines_4,lines_5,lines_nan
0,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2,False,False,True,False,False,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
3,False,False,False,False,True,False,False,True,True,True,...,True,True,True,True,True,True,True,True,True,True
4,False,False,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
5,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True


In [21]:
import torch

def build_sliding_speed_dataset(speed_df, mask_df, window=10):
    speed = []
    mask = []
    speed_df = speed_df.fillna(speed_df.mean())
    for i in range(window, TOTAL_T_STEPS + 1):
        columns = list(map(str, range(i - window, i)))
        speed.append(torch.Tensor(speed_df[columns].values))
        mask.append(torch.Tensor(mask_df[columns].values))
        
    return torch.stack(speed), torch.stack(mask)

In [22]:
speed, mask = build_sliding_speed_dataset(speed_df, mask_df)
speed_seq = speed.unsqueeze(3)
mask_seq = speed.unsqueeze(3)
print(mask_seq.shape)
print(speed_seq.shape)

torch.Size([135, 6157, 10, 1])
torch.Size([135, 6157, 10, 1])


In [23]:
static = torch.Tensor(static_df.values)
static_seq = static.unsqueeze(0)
static_seq = static_seq.unsqueeze(2)
static_seq = static_seq.expand([speed_seq.shape[0], -1, speed_seq.shape[2], -1])
print(static_seq.shape)

torch.Size([135, 6157, 10, 39])


In [24]:
ts_dataset = torch.utils.data.TensorDataset(speed_seq, mask_seq, static_seq)

In [25]:
adj_dense = adj.to_dense()

In [None]:
from src.nmf.lsm_rn import LSM_RN
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from test_tube import Experiment
from src.gcn_lstm.gcn_lstm_model import GCNLSTMModel 

model = GCNLSTMModel(41, 50, 2, adj_dense, ts_dataset, batch_size=8)
exp = Experiment(save_dir='gcnlstm_logs')
checkpoint_callback = ModelCheckpoint(
    filepath='gcnlstm.ckpt',
    save_best_only=True,
    verbose=True,
    monitor='avg_val_mae',
    mode='min'
)

# most basic trainer, uses good defaults
trainer = Trainer(experiment=exp, checkpoint_callback=checkpoint_callback)    
trainer.fit(model)
#TODO lr decay

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


gpu available: False, used: False
                  Name          Type  Params
0                 lstm      GCN_LSTM   38600
1           lstm.cell0  GCN_LSTMCell   18400
2   lstm.cell0.conv_xf     GraphConv    2100
3   lstm.cell0.conv_hf     GraphConv    2500
4   lstm.cell0.conv_xi     GraphConv    2100
5   lstm.cell0.conv_hi     GraphConv    2500
6   lstm.cell0.conv_xo     GraphConv    2100
7   lstm.cell0.conv_ho     GraphConv    2500
8   lstm.cell0.conv_xc     GraphConv    2100
9   lstm.cell0.conv_hc     GraphConv    2500
10          lstm.cell1  GCN_LSTMCell   20200
11  lstm.cell1.conv_xf     GraphConv    2550
12  lstm.cell1.conv_hf     GraphConv    2500
13  lstm.cell1.conv_xi     GraphConv    2550
14  lstm.cell1.conv_hi     GraphConv    2500
15  lstm.cell1.conv_xo     GraphConv    2550
16  lstm.cell1.conv_ho     GraphConv    2500
17  lstm.cell1.conv_xc     GraphConv    2550
18  lstm.cell1.conv_hc     GraphConv    2500
19             decoder        Conv2d     451


 57%|█████▋    | 8/14 [04:30<03:33, 35.51s/it, batch_nb=6, epoch=0, tng_loss=362799.020, v_nb=54]