In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import import_ipynb
import networks
import DataLoader_PIE
import utils
import time
import numpy as np
from sklearn.metrics import accuracy_score, recall_score

importing Jupyter notebook from networks.ipynb
importing Jupyter notebook from DataLoader_PIE.ipynb
importing Jupyter notebook from utils.ipynb


In [2]:
class args():
    def __init__(self):
        self.base_out_features = 2048
        self.t_obs = 8
        self.batch_size = 20
        self.scene_shape = [3, 144, 256]
        self.base_shape = [2048, 9, 16]
        self.hidden_size = 128
        self.embedding_size = 64
        self.input_size = 2
        self.output_size = 2
        self.sample = True
        self.n_train_sequences = 15000
        self.n_val_sequences = 10
        self.trainOrVal = 'train'
        
        #dataset
        self.jaad_dataset = '../../../../data/smailait-data/jaad/annotations'
        self.dtype = 'train'
        self.from_file = True
        self.file = '/data/smailait-data/train_crossing.csv'
        self.seq_len = 18
        self.predicted_seq_len = 18
        self.crop = 0.3
        self.activity_h = 9
        self.activity_w = 16
        self.truncate = 0
        self.final_frame_offset = 0
        self.loader_workers = 8
        self.loader_shuffle = True
        self.pin_memory = False
        self.image_resize = [240, 426]
        self.image_size = [1080, 1920]
        self.device='cuda'
        
args = args()

In [3]:
class PS_PIE(nn.Module):
    def __init__(self, args):
        super(PS_PIE, self).__init__()

        self.speed_encoder = nn.LSTM(input_size=4, hidden_size=256)
        self.pos_encoder   = nn.LSTM(input_size=4, hidden_size=256)

        self.pos_embedding = nn.Sequential(nn.Linear(in_features=2, out_features=4),
                                           nn.ReLU())

        self.crossing_decoder = nn.LSTMCell(input_size=4, hidden_size=256)

        self.fc_crossing = nn.Sequential(nn.Linear(in_features=256, out_features=2),
                                         nn.ReLU())
        self.softmax = nn.Softmax(dim=1)
        
        self.args = args
        
    def forward(self, speed=None, pos=None):

        _, (hsp, csp) = self.speed_encoder(speed.permute(1,0,2))
        hsp = hsp.squeeze(0)
        csp = csp.squeeze(0)

        _, (hpo, cpo) = self.pos_encoder(pos.permute(1,0,2))
        crossing_outputs = torch.tensor([], device='cuda')
        in_cr = pos[:,-1,:]
        hpo = hpo.squeeze(0)
        cpo = cpo.squeeze(0)
        #hdc = torch.cat((hpo, hsp), dim=1)
        #cdc = torch.cat((cpo, csp), dim=1)
        hdc = hpo+hsp
        cdc = cpo+csp
        for i in range(8):
            hdc, cdc         = self.crossing_decoder(in_cr, (hdc, cdc))
            in_cr            = self.pos_embedding(self.fc_crossing(hdc))
            crossing_output  = self.softmax(self.fc_crossing(hdc))
            crossing_outputs = torch.cat((crossing_outputs, crossing_output.unsqueeze(1)), dim = 1)

        return crossing_outputs

In [4]:
net = PS_PIE(args).to('cuda')

In [5]:
train, val, test = DataLoader_PIE.data_loader(args)

frame
crossing_true
imagefolderpath
scenefolderpath
filename
file
bounding_box
im_size
future_bounding_box
future_crossing
train  loaded
frame
crossing_true
imagefolderpath
scenefolderpath
filename
file
bounding_box
im_size
future_bounding_box
future_crossing
train  loaded
frame
crossing_true
imagefolderpath
scenefolderpath
filename
file
bounding_box
im_size
future_bounding_box
future_crossing
val  loaded


In [6]:
learning_rate  = 0.00001
optimizer     = optim.Adam(net.parameters(), lr=learning_rate)
scheduler      = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, threshold = 1e-8, verbose=True)
mse            = nn.MSELoss()
bce            = nn.BCELoss()
train_s_scores = []
train_c_scores = []
val_s_scores   = []
val_c_scores   = []
log            = open('log_PS_PIE.txt', 'w+')

In [7]:
for epoch in range(1, 101):
    start = time.time()

    avg_epoch_train_c_loss = 0
    avg_epoch_val_c_loss   = 0
    avg_acc = 0
    avg_rec = 0
    
    counter = 0
    for idx, (obs_s, _, obs_p, _, target_c) in enumerate(train):
        counter += 1
        obs_s    = obs_s.type(torch.float32).to(device='cuda')
        obs_p    = obs_p.type(torch.float32).to(device='cuda')
        target_c = target_c.type(torch.float32).to(device='cuda')
        
        net.zero_grad()
        crossing_preds = net(speed=obs_s, pos=obs_p)
        crossing_loss = 0
        for i in range(8):
            crossing_loss += bce(crossing_preds[:,i,:], target_c[:,i,:])
        crossing_loss /= 8
        crossing_loss.backward()
        optimizer.step()
        avg_epoch_train_c_loss += float(crossing_loss)

    avg_epoch_train_c_loss/=counter
    train_c_scores.append(avg_epoch_train_c_loss)
    
    counter=0
    for idx, (obs_s, _, obs_p, _, target_c) in enumerate(test):
        counter+=1
        obs_s    = obs_s.type(torch.float32).to(device='cuda')
        obs_p    = obs_p.type(torch.float32).to(device='cuda')
        target_c = target_c.type(torch.float32).to(device='cuda')
        
        with torch.no_grad():
            crossing_preds = net(speed=obs_s, pos=obs_p)
            crossing_loss = 0
            for i in range(8):
                crossing_loss += bce(crossing_preds[:,i,:], target_c[:,i,:])
            crossing_loss /= 8
            avg_epoch_val_c_loss += float(crossing_loss)
            #avg_acc += float(torch.sum(nn.Threshold(0.,0.)(nn.Threshold(-0.5, 1.)(-1*crossing_preds[:,:,0]))==target_c[:,:-1,0]).type(torch.float32)/(20*8))
            avg_rec += recall_score(nn.Threshold(0.,0.)(nn.Threshold(-0.5, 1.)(-1*crossing_preds[:,:,0])).reshape(-1).detach().cpu().numpy(), target_c[:,:-1,0].reshape(-1).cpu().numpy(), average='binary', zero_division=1)
            avg_acc += accuracy_score(nn.Threshold(0.,0.)(nn.Threshold(-0.5, 1.)(-1*crossing_preds[:,:,0])).reshape(-1).detach().cpu().numpy(), target_c[:,:-1,0].reshape(-1).cpu().numpy())
        
    avg_epoch_val_c_loss/=counter
    avg_acc/=counter
    avg_rec/=counter
    val_c_scores.append(avg_epoch_val_c_loss)
    
    scheduler.step(crossing_loss)
    
    print('e:', epoch, ' | tc_l:', avg_epoch_train_c_loss, ' | vc_l:', avg_epoch_val_c_loss,' | acc:', avg_acc,' | rec:', avg_rec,
          '| t:%.4f'%(time.time()-start))


e: 1  | tc_l: 0.5435552312930425  | vc_l: 0.4537753471914603  | acc: 0.7754464285714286  | rec: 0.8464154609562773 | t:23.0591
e: 2  | tc_l: 0.36274482009808223  | vc_l: 0.3894284483121366  | acc: 0.8430484693877549  | rec: 0.7316322475920848 | t:21.9644
e: 3  | tc_l: 0.32376628439625105  | vc_l: 0.3727226610086402  | acc: 0.849170918367347  | rec: 0.7493893440524327 | t:21.5668
e: 4  | tc_l: 0.3039538821180662  | vc_l: 0.3579064245734896  | acc: 0.8531887755102038  | rec: 0.7244736953586774 | t:18.8294
e: 5  | tc_l: 0.28493862058719  | vc_l: 0.3432806999403603  | acc: 0.8554846938775507  | rec: 0.7112354430391106 | t:19.7702
e: 6  | tc_l: 0.26965628296136857  | vc_l: 0.3363522778512264  | acc: 0.8598214285714281  | rec: 0.7147601237306285 | t:18.9706
e: 7  | tc_l: 0.25894700350860755  | vc_l: 0.3288443851835874  | acc: 0.864732142857143  | rec: 0.7432191385996343 | t:21.9631
e: 8  | tc_l: 0.2515563983817895  | vc_l: 0.32659539299047724  | acc: 0.866517857142857  | rec: 0.7416015399571

e: 64  | tc_l: 0.2100987055550019  | vc_l: 0.30287218101474706  | acc: 0.8805484693877553  | rec: 0.761827734375072 | t:21.9834
e: 65  | tc_l: 0.2100969303722183  | vc_l: 0.30124211326545597  | acc: 0.8808673469387756  | rec: 0.7788840555994748 | t:22.2678
e: 66  | tc_l: 0.20992138180633385  | vc_l: 0.30250831974708303  | acc: 0.8805484693877554  | rec: 0.7563526891998288 | t:21.6292
e: 67  | tc_l: 0.21003670470416547  | vc_l: 0.3022219538308528  | acc: 0.8805484693877556  | rec: 0.7625357159215717 | t:22.5915
e: 68  | tc_l: 0.21003542559593916  | vc_l: 0.3008655367457137  | acc: 0.8817602040816327  | rec: 0.7496382740708604 | t:22.0255
e: 69  | tc_l: 0.21009245137373606  | vc_l: 0.30276024562059617  | acc: 0.8805484693877553  | rec: 0.7541740446914785 | t:20.7522
e: 70  | tc_l: 0.20997826381772758  | vc_l: 0.3029889068281164  | acc: 0.8803571428571427  | rec: 0.7649927683804943 | t:21.9994
e: 71  | tc_l: 0.21008977496623993  | vc_l: 0.30176448605346434  | acc: 0.8815051020408167  | re

In [9]:
counter=0
for idx, (obs_s, _, obs_p, _, target_c) in enumerate(test):
    counter+=1
    obs_s    = obs_s.type(torch.float32).to(device='cuda')
    obs_p    = obs_p.type(torch.float32).to(device='cuda')
    target_c = target_c.type(torch.float32).to(device='cuda')

    with torch.no_grad():
        crossing_preds = net(speed=obs_s, pos=obs_p)
        crossing_loss = 0
        for i in range(8):
            crossing_loss += bce(crossing_preds[:,i,:], target_c[:,i,:])
        crossing_loss /= 8
        avg_epoch_val_c_loss += float(crossing_loss)
        avg_rec += recall_score(nn.Threshold(0.,0.)(nn.Threshold(-0.5, 1.)(-1*crossing_preds[:,:,0])).reshape(-1).detach().cpu().numpy(), target_c[:,:-1,0].reshape(-1).cpu().numpy(), average='binary', zero_division=1)
        avg_acc += accuracy_score(nn.Threshold(0.,0.)(nn.Threshold(-0.5, 1.)(-1*crossing_preds[:,:,0])).reshape(-1).detach().cpu().numpy(), target_c[:,:-1,0].reshape(-1).cpu().numpy())
        
avg_epoch_val_c_loss/=counter
avg_acc/=counter
avg_rec/=counter
val_c_scores.append(avg_epoch_val_c_loss)

#scheduler.step(crossing_loss)

print('e:', epoch, ' | tc_l:', avg_epoch_train_c_loss, ' | vc_l:', avg_epoch_val_c_loss,' | acc:', avg_acc,' | rec:', avg_rec,
      '| t:%.4f'%(time.time()-start))


e: 100  | tc_l: 0.21005194469044605  | vc_l: 0.3045199844018424  | acc: 0.8899383972664453  | rec: 0.7619282550614257 | t:19925.3827
