In [8]:
import sys
import pandas
sys.path.append('../')

from src.graph_utils import partition_graph_by_lonlat
import networkx as nx
from jurbey.jurbey import JURBEY

with open("../data/1558537930325.jurbey", 'rb') as tempf:
    g = JURBEY.load(tempf.read())
print(g.number_of_nodes())
g_partition = partition_graph_by_lonlat(g)
nodes = list(g_partition.nodes())

290100


In [9]:
g_partition.nodes[nodes[100]]

{'data': Node(coord=GeoCoordinates(lon=13.3233121, lat=52.5205769, alt=nan), metadata={})}

In [10]:
A = nx.adjacency_matrix(g_partition)

In [11]:
def most_frequent(List): 
    return max(set(List), key = List.count)

def arc_features(arc):
    arc = g_partition[arc[0]][arc[1]]
    return [
        arc['data'].metadata['highway'],
        arc['data'].metadata.get('surface', 'no_sur'),
        arc['data'].roadClass.name
    ],  [float(arc['data'].metadata.get('maxspeed', '50')), 
        int(arc['data'].metadata.get('lanes', '1'))]

def node_feature(node):
    # successor neighbors
    neigbors = g_partition.neighbors(node)
    highway_f = list()
    surface_f = list()
    roadtype_f = list()
    maxspeed_f = list()
    lanes_f = list()
    for neigbor in neigbors:
        arc = g_partition[node][arc[1]]
        highway_f.append(arc['data'].metadata['highway'])
        surface_f.append(arc['data'].metadata.get('surface', 'no_sur')),
        roadtype_f.append(arc['data'].roadClass.name)
        maxspeed_f.append(float(arc['data'].metadata.get('maxspeed', '50')))
        lane_f.append(arc['data'].metadata.get('lanes', '1'))
    return [most_frequent(highway_f), most_frequent(surface_f),
            most_frequent(roadtype_f)], [most_frequent(maxspeed_f),most_frequent(lanes_f)]

In [15]:
# Our speed data uses segment ids, but the model uses sequential indexes, based on `.nodes()`
import math
id_to_idx = {}
# defaultdict won't do what you expect in Pandas
df = pandas.read_csv("../data/timeseries_speed_april_first_week.csv")
import math
id_to_idx = {}
# defaultdict won't do what you expect in Pandas

for id_ in df["from_node"].unique():
    id_to_idx[id_] = math.nan
for id_ in df["to_node"].unique():
    id_to_idx[id_] = math.nan
    
for idx, id_ in enumerate(g_partition.nodes()):
    id_to_idx[id_] = idx
    
# Let's transform ids to indeces
df["from_node_idx"] = df.replace({"from_node": id_to_idx})["from_node"]
df["to_node_idx"] = df.replace({"to_node": id_to_idx})["to_node"]

from_nodes = df.from_node.values
to_nodes = df.to_node.values
import numpy as np
for edge in g_partition.edges():
    if edge[0] not in from_nodes or edge[1] not in to_nodes:
        a = df.loc[0][0:-4].values
        b = df.loc[0][-4:].values
        df.loc[len(df)] = np.concatenate((a,[edge[0], edge[1]], b[-2:]))

In [16]:
from scipy.sparse import hstack

import torch

def snapshot(t, df=df, g_partition=g_partition):
    df_t = df[[t, "from_node_idx", "to_node_idx"]]
    df_t = df_t.dropna()
    row = df_t["from_node_idx"].tolist()
    col = df_t["to_node_idx"].tolist()
    data = df_t[t].tolist()
    size = len(g_partition.nodes()) 
    feature_size = 2

    return {"indices": (row, col), "values": np.random.rand(len(data), feature_size), "shape": (size, size, feature_size)}

TOTAL_T_STEPS=144
def build_sparse_dataset(from_=0, to=TOTAL_T_STEPS):
    dataset = {"indices": ([], [], []), "values": []}
    for t in range(from_, to):

        snap = snapshot(str(t))
        dataset["indices"][0].extend([t] * len(snap["indices"][0]))
        dataset["indices"][1].extend(snap["indices"][0])
        dataset["indices"][2].extend(snap["indices"][1])
        dataset["values"].extend(snap["values"])

    i = torch.LongTensor(dataset["indices"])
    v = torch.FloatTensor(dataset["values"])
    return torch.sparse.FloatTensor(i, v, torch.Size((to, *snap["shape"])))

dataset = build_sparse_dataset()

In [17]:
dataset.shape

torch.Size([144, 3476, 3476, 2])

In [18]:
len(dataset._values())

69157

In [19]:
nonzero_values_cnt = len(dataset._values())
# what percent goes into training/validation/testing
tng_pct = 0.7
val_pct = 0.1
tst_pct = 1 - tng_pct - val_pct
# now we want to split list of all non-zeros promortionally:
# [0, split1_idx], [split1_idx, split2_idx] and [split2_idx:]
split1_idx = int(nonzero_values_cnt * tng_pct)
split2_idx = -int(nonzero_values_cnt * tst_pct)

In [20]:
from random import shuffle

# but we select indexes randomly
idxs = list(range(nonzero_values_cnt))
shuffle(idxs)
# these are non-zero indexes
tng_idxs = idxs[:split1_idx]
val_idxs = idxs[split1_idx:split2_idx]
tst_inxs = idxs[split2_idx:]

In [21]:
dataset_split = {}
for name, idxs in [('tng', tng_idxs), ('val', val_idxs), ('tst', tst_inxs)]:
    i = torch.LongTensor([
        dataset._indices()[0][idxs].tolist(),
        dataset._indices()[1][idxs].tolist(),
        dataset._indices()[2][idxs].tolist()
    ])
    v = torch.FloatTensor(dataset._values()[idxs])
    # NOTE sparse tensor is not supported yet by the model
    dataset_split[name] = torch.sparse.FloatTensor(i, v, dataset.shape)



In [22]:
from torch.utils.data import DataLoader

def dataloader_from_tensor(t):
        return DataLoader(
            torch.utils.data.TensorDataset(torch.arange(0, len(t)), t),
            shuffle=True,
            batch_size=1,
        )
dl = dataloader_from_tensor(dataset_split['tng'])
for batch_i, data_batch in enumerate(dl):
    print(batch_i)

RuntimeError: sparse tensors do not have strides

In [23]:
dataset_split['tng'] = dataset_split['tng'].to_dense()
dataset_split['val'] = dataset_split['val'].to_dense()
dataset_split['tst'] = dataset_split['tst'].to_dense()

In [24]:
dataset_split['tng'].shape

torch.Size([144, 3476, 3476, 2])

In [32]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from test_tube import Experiment
import os
import imp

import src.nmf.lsm_rn_v2 as l
l = imp.reload(l)
LSM_RN = l.LSM_RN

model = LSM_RN(TOTAL_T_STEPS, n=3476, k=50, λ=0.1, adj_mat=A, datasets=dataset_split, batch_size=8, feature_size=2)
exp = Experiment(save_dir=os.getcwd())

# pass in experiment for automatic tensorboard logging.    
trainer = Trainer(experiment=exp, max_nb_epochs=30, train_percent_check=1)  
trainer.fit(model)

gpu available: False, used: False
Empty DataFrame
Columns: [Name, Type, Params]
Index: []
G shape: torch.Size([8, 3476, 3476])
t: tensor([117,  82, 114,  99,  97,  80, 115,   6]), batch_nb: 0
AAA: torch.Size([8, 3476, 2])
AAB: torch.Size([50, 50])
AAC: torch.Size([8, 3476, 50])
AAA: torch.Size([8, 3476, 2])
AAB: torch.Size([50, 50])
AAC: torch.Size([8, 3476, 50])
Ghat shape: torch.Size([8, 3476, 3476])
Ut shape: torch.Size([8, 3476, 50])
G shape: torch.Size([8, 3476, 3476])
t: tensor([  9, 128,  30,  49,  95,  63,  94, 137]), batch_nb: 1
AAA: torch.Size([8, 3476, 2])
AAB: torch.Size([50, 50])
AAC: torch.Size([8, 3476, 50])
AAA: torch.Size([8, 3476, 2])
AAB: torch.Size([50, 50])
AAC: torch.Size([8, 3476, 50])
Ghat shape: torch.Size([8, 3476, 3476])
Ut shape: torch.Size([8, 3476, 50])
G shape: torch.Size([8, 3476, 3476])
t: tensor([ 86,   3,  61,   4,  17, 127,  10,  21]), batch_nb: 2
AAA: torch.Size([8, 3476, 2])
AAB: torch.Size([50, 50])
AAC: torch.Size([8, 3476, 50])
AAA: torch.Size([

  3%|▎         | 1/36 [00:00<00:31,  1.11it/s]

G shape: torch.Size([8, 3476, 3476])
t: tensor([ 12,  42,  83,  51,  76,  53,  92, 132]), batch_nb: 0
AAA: torch.Size([8, 3476, 2])
AAB: torch.Size([50, 50])
AAC: torch.Size([8, 3476, 50])
AAA: torch.Size([8, 3476, 2])
AAB: torch.Size([50, 50])
AAC: torch.Size([8, 3476, 50])
Ghat shape: torch.Size([8, 3476, 3476])
Ut shape: torch.Size([8, 3476, 50])


  6%|▌         | 2/36 [00:11<02:12,  3.89s/it, batch_nb=0, epoch=0, tng_loss=137.412, v_nb=17]

G shape: torch.Size([8, 3476, 3476])
t: tensor([  7,   6,  11, 116, 142,  75,  17, 141]), batch_nb: 1
AAA: torch.Size([8, 3476, 2])
AAB: torch.Size([50, 50])
AAC: torch.Size([8, 3476, 50])
AAA: torch.Size([8, 3476, 2])
AAB: torch.Size([50, 50])
AAC: torch.Size([8, 3476, 50])
Ghat shape: torch.Size([8, 3476, 3476])
Ut shape: torch.Size([8, 3476, 50])


RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.