In [170]:
%reload_ext autoreload
%autoreload 2

In [171]:
import sys
sys.path.append('../')

In [172]:
TOTAL_T_STEPS = 144

## Get Jurbey Sub-Graph

In [173]:
from src.graph_utils import partition_graph_by_lonlat
from jurbey.jurbey import JURBEY

with open("../data/1556798416403.jurbey", 'rb') as tempf:
    g = JURBEY.load(tempf.read())
g_partition = partition_graph_by_lonlat(g)

## Build a dataframe with all time and static features

In [174]:
import pandas

In [175]:
df = pandas.read_csv("../data/timeseries_speed_april_first_week.csv")
df = df.drop(columns=["Unnamed: 0"])

In [176]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,136,137,138,139,140,141,142,143,from_node,to_node
0,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,10.587459,...,10.581123,,,,,,10.850843,10.036408,628154368,1023689595
1,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,10.587459,...,10.581123,,,,,,10.850843,10.036408,628154368,1023689595
2,,,,,,10.48621,,,10.937218,,...,,,,,,,,,527147009,27537239
3,,,,,,,,,,,...,,,,,,,,,527147009,26908815
4,,,,10.468811,10.636621,10.288534,10.617513,1.758539,3.404401,10.224568,...,6.098767,10.257657,10.586166,,10.669476,4.313417,10.392901,10.51831,628154370,3804638178


### Let's add more columns for static features

In [177]:
import math
def get_static_features(row):
    arc = g_partition[row['from_node']][row['to_node']]
    return (
        arc['data'].metadata['highway'],
        arc['data'].metadata.get('surface', None),
        arc['data'].roadClass.name,
        arc['data'].metadata.get('maxspeed', math.nan),
        arc['data'].metadata.get('lanes', '1')
    )

In [178]:
df["highway"], df["surface"], df["roadClass"], df["maxspeed"], df["lines"] = zip(*df.apply(get_static_features, axis=1))

In [179]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,141,142,143,from_node,to_node,highway,surface,roadClass,maxspeed,lines
0,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,10.587459,...,,10.850843,10.036408,628154368,1023689595,primary,asphalt,MajorRoad,50,4
1,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,10.587459,...,,10.850843,10.036408,628154368,1023689595,primary,asphalt,MajorRoad,50,4
2,,,,,,10.48621,,,10.937218,,...,,,,527147009,27537239,tertiary,asphalt,LocalRoad,50,1
3,,,,,,,,,,,...,,,,527147009,26908815,tertiary,asphalt,LocalRoad,50,1
4,,,,10.468811,10.636621,10.288534,10.617513,1.758539,3.404401,10.224568,...,4.313417,10.392901,10.51831,628154370,3804638178,primary,asphalt,MajorRoad,50,4


In [180]:
df_dummies = pandas.get_dummies(df, columns=["surface", "maxspeed"], dummy_na=True)
df_dummies = pandas.get_dummies(df_dummies, columns=["highway", "roadClass", "lines"], dummy_na=False)

In [181]:
df_dummies.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,highway_tertiary_link,highway_unclassified,roadClass_DirtRoad,roadClass_LocalRoad,roadClass_MajorRoad,lines_1,lines_2,lines_3,lines_4,lines_5
0,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,10.587459,...,0,0,0,0,1,0,0,0,1,0
1,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,10.587459,...,0,0,0,0,1,0,0,0,1,0
2,,,,,,10.48621,,,10.937218,,...,0,0,0,1,0,1,0,0,0,0
3,,,,,,,,,,,...,0,0,0,1,0,1,0,0,0,0
4,,,,10.468811,10.636621,10.288534,10.617513,1.758539,3.404401,10.224568,...,0,0,0,0,1,0,0,0,1,0


In [182]:
df_unique = df_dummies.drop_duplicates()

In [183]:
df_unique.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,highway_tertiary_link,highway_unclassified,roadClass_DirtRoad,roadClass_LocalRoad,roadClass_MajorRoad,lines_1,lines_2,lines_3,lines_4,lines_5
0,,,,10.854457,,5.680978,7.036838,1.499997,10.088634,10.587459,...,0,0,0,0,1,0,0,0,1,0
2,,,,,,10.48621,,,10.937218,,...,0,0,0,1,0,1,0,0,0,0
3,,,,,,,,,,,...,0,0,0,1,0,1,0,0,0,0
4,,,,10.468811,10.636621,10.288534,10.617513,1.758539,3.404401,10.224568,...,0,0,0,0,1,0,0,0,1,0
5,,,,,,,10.286999,,,10.83357,...,0,0,0,0,1,0,0,1,0,0


In [184]:
df_preprocessed = df_unique

## Let's now make an adjecancy matrix, that matches the order in our dataframe

In [185]:
import networkx as nx
import scipy.sparse as sp
import numpy as np
import torch

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)
                                    
def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

In [186]:
L = nx.line_graph(nx.DiGraph(g_partition))

nodelist = [tuple(x) for x in df_preprocessed[['from_node','to_node']].values]

In [187]:
adj = nx.to_scipy_sparse_matrix(L, format="coo", nodelist=nodelist)
# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

adj = normalize(adj + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)

### Now let's build time-series dataset

In [188]:
static_features = ['highway_access_ramp',
 'highway_corridor',
 'highway_living_street',
 'highway_platform',
 'highway_primary',
 'highway_residential',
 'highway_secondary',
 'highway_secondary_link',
 'highway_service',
 'highway_tertiary',
 'highway_tertiary_link',
 'highway_unclassified',
 'surface_asphalt',
 'surface_cobblestone',
 'surface_cobblestone:flattened',
 'surface_concrete',
 'surface_concrete:plates',
 'surface_grass_paver',
 'surface_paved',
 'surface_paving_stones',
 'surface_sett',
 'surface_nan',
 'roadClass_DirtRoad',
 'roadClass_LocalRoad',
 'roadClass_MajorRoad',
 'maxspeed_10',
 'maxspeed_20',
 'maxspeed_30',
 'maxspeed_5',
 'maxspeed_50',
 'maxspeed_nan',
 'lines_1',
 'lines_2',
 'lines_3',
 'lines_4',
 'lines_5'
]

In [189]:
len(static_features)

36

In [190]:
SPEED_COLUMNS = list(map(str, range(TOTAL_T_STEPS)))

In [191]:
mask_df = df_preprocessed.notna()
static_df = df_preprocessed[static_features]

In [192]:
# Speed preprocessing
from sklearn.preprocessing import FunctionTransformer, RobustScaler
from sklearn.pipeline import Pipeline
import pandas as pd

speed_df = df_preprocessed[SPEED_COLUMNS]
speed_df = speed_df.fillna(speed_df.mean())
speed_pipeline = Pipeline([
    #('log', FunctionTransformer(func=np.expm1, inverse_func=np.expm1, validate=True))
    ('scaler', RobustScaler())
])
speed_pipeline.fit(speed_df.values.flatten().reshape(-1, 1))
speed_df = pd.DataFrame(speed_pipeline.transform(speed_df.values), index=speed_df.index, columns=speed_df.columns)

In [193]:
mask_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,highway_tertiary_link,highway_unclassified,roadClass_DirtRoad,roadClass_LocalRoad,roadClass_MajorRoad,lines_1,lines_2,lines_3,lines_4,lines_5
0,False,False,False,True,False,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2,False,False,False,False,False,True,False,False,True,False,...,True,True,True,True,True,True,True,True,True,True
3,False,False,False,False,False,False,False,False,False,False,...,True,True,True,True,True,True,True,True,True,True
4,False,False,False,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
5,False,False,False,False,False,False,True,False,False,True,...,True,True,True,True,True,True,True,True,True,True


In [194]:
import torch
WINDOW = 10

def build_sliding_speed_dataset(speed_df, mask_df, window=WINDOW):
    speed = []
    mask = []
    for i in range(window, TOTAL_T_STEPS + 1):
        columns = list(map(str, range(i - window, i)))
        speed.append(torch.Tensor(speed_df[columns].values))
        mask.append(torch.Tensor(mask_df[columns].values))
        
    return torch.stack(speed), torch.stack(mask)

In [195]:
speed, mask = build_sliding_speed_dataset(speed_df, mask_df)
speed_seq = speed.unsqueeze(3)
mask_seq = mask.unsqueeze(3)
print(mask_seq.shape)
print(speed_seq.shape)

torch.Size([135, 6157, 10, 1])
torch.Size([135, 6157, 10, 1])


In [196]:
static = torch.Tensor(static_df.values) - 0.5
static_seq = static.unsqueeze(0)
static_seq = static_seq.unsqueeze(2)
static_seq = static_seq.expand([speed_seq.shape[0], -1, speed_seq.shape[2], -1])
print(static_seq.shape)

torch.Size([135, 6157, 10, 36])


In [197]:
ts_dataset = torch.utils.data.TensorDataset(speed_seq, mask_seq, static_seq)

In [198]:
adj_dense = adj.to_dense()

In [205]:
from src.nmf.lsm_rn import LSM_RN
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from test_tube import Experiment
from src.gcn_lstm.gcn_lstm_model import GCNLSTMModel 

model = GCNLSTMModel(38, 6, 3, adj_dense, ts_dataset, speed_transform=speed_pipeline, timesteps=WINDOW - 1, batch_size=32)
exp = Experiment(save_dir='gcnlstm_logs')
checkpoint_callback = ModelCheckpoint(
    filepath='gcnlstm.ckpt',
    save_best_only=True,
    verbose=True,
    monitor='avg_val_mae',
    mode='min'
)

# most basic trainer, uses good defaults
trainer = Trainer(experiment=exp, checkpoint_callback=checkpoint_callback)  
#trainer = Trainer(experiment=exp)    
trainer.fit(model)
#TODO lr decay

[autoreload of src.gcn_lstm.gcn_lstm_model failed: Traceback (most recent call last):
  File "/Users/dscsade/.local/share/virtualenvs/speed-imputation-duMcSXZy/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/Users/dscsade/.local/share/virtualenvs/speed-imputation-duMcSXZy/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 450, in superreload
    update_generic(old_obj, new_obj)
  File "/Users/dscsade/.local/share/virtualenvs/speed-imputation-duMcSXZy/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 387, in update_generic
    update(a, b)
  File "/Users/dscsade/.local/share/virtualenvs/speed-imputation-duMcSXZy/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 357, in update_class
    update_instances(old, new)
  File "/Users/dscsade/.local/share/virtualenvs/speed-imputation-duMcSXZy/lib/python3.6/site-packages/IPython/extensions/autoreload.py", l

gpu available: False, used: False
                  Name          Type  Params
0                 lstm      GCN_LSTM    1704
1           lstm.cell0  GCN_LSTMCell    1080
2   lstm.cell0.conv_xf     GraphConv     234
3   lstm.cell0.conv_hf     GraphConv      36
4   lstm.cell0.conv_xi     GraphConv     234
5   lstm.cell0.conv_hi     GraphConv      36
6   lstm.cell0.conv_xo     GraphConv     234
7   lstm.cell0.conv_ho     GraphConv      36
8   lstm.cell0.conv_xc     GraphConv     234
9   lstm.cell0.conv_hc     GraphConv      36
10          lstm.cell1  GCN_LSTMCell     312
11  lstm.cell1.conv_xf     GraphConv      42
12  lstm.cell1.conv_hf     GraphConv      36
13  lstm.cell1.conv_xi     GraphConv      42
14  lstm.cell1.conv_hi     GraphConv      36
15  lstm.cell1.conv_xo     GraphConv      42
16  lstm.cell1.conv_ho     GraphConv      36
17  lstm.cell1.conv_xc     GraphConv      42
18  lstm.cell1.conv_hc     GraphConv      36
19          lstm.cell2  GCN_LSTMCell     312
20  lstm.cell2.conv_x

ValueError: operands could not be broadcast together with shapes (28,6157) (172396,1) 