In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import import_ipynb
import networks
import DataLoader_PS_speed
import utils
import time
import numpy as np

In [None]:
class args():
    def __init__(self):
        self.base_out_features = 2048
        self.t_obs = 8
        self.batch_size = 4
        self.scene_shape = [3, 144, 256]
        self.base_shape = [2048, 9, 16]
        self.hidden_size = 128
        self.embedding_size = 64
        self.input_size = 2
        self.output_size = 2
        self.sample = True
        self.n_train_sequences = 12000
        self.n_val_sequences = 3000
        self.trainOrVal = 'train'
        
        #dataset
        self.jaad_dataset = '../../../../data/smailait-data/jaad/annotations'
        self.dtype = 'train'
        self.from_file = True
        self.file = '/data/smailait-data/train_speed.csv'
        self.seq_len = 18
        self.predicted_seq_len = 18
        self.crop = 0.3
        self.activity_h = 9
        self.activity_w = 16
        self.truncate = 0
        self.final_frame_offset = 0
        self.loader_workers = 8
        self.loader_shuffle = True
        self.pin_memory = False
        self.image_resize = [240, 426]
        self.image_size = [1080, 1920]
        self.device='cuda'
        
args = args()

In [None]:
class PS_LSTM(nn.Module):
    def __init__(self, args):
        super(PS_LSTM, self).__init__()
        self.basenet = torchvision.models.resnet50(pretrained=True)
        self.basenet = nn.Sequential(*list(self.basenet.children())[:-2])
        self.basenet = nn.Sequential(self.basenet, nn.Conv2d(in_channels=2048, out_channels=512, kernel_size=1, stride=1))
        
        self.speed_encoder = nn.LSTM(input_size=4, hidden_size=128)
        
        self.pos_encoder = nn.LSTM(input_size=4, hidden_size=128)
        
        self.scene_encoder = nn.LSTM(input_size=512*8*14, hidden_size=128)
        
        self.decoder = nn.LSTMCell(input_size=4, hidden_size=128)
        
        self.fc = nn.Linear(in_features=128, out_features=4)
        
        self.activation = nn.Hardtanh(min_val=-100, max_val=100)
        
        self.args = args
        
    def forward(self, scenes, speed, pos):

        scenes  = self.basenet(scenes.view(scenes.shape[0]*scenes.shape[1], scenes.shape[2],
                                           scenes.shape[3], scenes.shape[4]))

        _, (hs,cs) = self.scene_encoder(scenes.view(args.batch_size, 8, -1).permute(1,0,2))

        _, (hsp, csp) = self.speed_encoder(speed.permute(1,0,2))
        
        _, (hpo, cpo) = self.pos_encoder(pos.permute(1,0,2))

        outputs = torch.tensor([], device='cuda')
        in_dec = pos[:,-1,:]
        
        hd = hs + hsp + hpo
        cd = cs + csp + cpo 
        
        hd = hd.squeeze(0)
        cd = cd.squeeze(0)
        
        for i in range(8):
            hd, cd = self.decoder(in_dec, (hd, cd))
            output = self.activation(self.fc(hd))
            outputs = torch.cat((outputs, output.unsqueeze(1)), dim = 1)
            in_dec = output.detach()
            
        return outputs

In [None]:
net = PS_LSTM(args).to('cuda')

In [None]:
#net.load_state_dict(torch.load('/data/smailait-data/models/PS-LSTM-speed/PS_LSTM_speed_dropout0.1_s2s_61epochs.pkl'))
#net = net.train()

In [None]:
train, val, test = DataLoader_PS_speed.data_loader(args)

In [None]:
learning_rate = 0.0001
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, threshold = 1e-4, verbose=True)
criterion = nn.MSELoss()
train_scores = []
val_scores = []
log = open('log_PS_LSTM_S2S_speed1-5.txt', 'w+')

In [None]:
for epoch in range(1, 101):
    start = time.time()
    avg_epoch_train_loss = 0
    avg_epoch_val_loss = 0
    ade = 0
    fde = 0
    aiou = 0
    fiou = 0
    counter = 0
    for idx, (scenes, obs, target, obs_p, target_p) in enumerate(train):
        counter += 1
        scenes = scenes.to('cuda')
        obs = obs.type(torch.float32).to(device='cuda')
        target = target.type(torch.float32).to(device='cuda')
        
        net.zero_grad()
        preds = net(scenes, obs)
        train_loss = criterion(preds, target)/10
        train_loss.backward()
        optimizer.step()
        avg_epoch_train_loss += float(train_loss)
        
    avg_epoch_train_loss/=counter
    train_scores.append(avg_epoch_train_loss)
    
    counter=0
    for idx, (scenes, obs, target, obs_p, target_p) in enumerate(val):
        counter+=1
        scenes = scenes.to('cuda')
        obs = obs.type(torch.float32).to(device='cuda')
        target = target.type(torch.float32).to(device='cuda')
        obs_p = obs_p.type(torch.float32).to(device='cuda')
        target_p = target_p.type(torch.float32).to(device='cuda')
        
        with torch.no_grad():
            preds = net(scenes, obs)
            val_loss = criterion(preds, target)/10
            preds_p = utils.speed2pos(preds, obs_p, 4, 'cuda')
            ade += float(utils.ADE_c(preds_p, target_p))
            fde += float(utils.FDE_c(preds_p, target_p))
            aiou += float(utils.AIOU(preds_p, target_p))
            fiou += float(utils.FIOU(preds_p, target_p))
            
        avg_epoch_val_loss += float(val_loss)
        
    avg_epoch_val_loss/=counter
    val_scores.append(avg_epoch_val_loss)
    ade/=counter
    fde/=counter     
    aiou/=counter
    fiou/=counter
    
    scheduler.step(val_loss)
    
    print('e:', epoch, ' | t_loss: %.4f'% avg_epoch_train_loss, ' | v_loss: %.4f'% avg_epoch_val_loss, 
          '| ade: %.4f'% ade, '| fde: %.4f'% fde, '| aiou: %.4f'% aiou, '| fiou: %.4f'% fiou, '| t:%.4f'%(time.time()-start))
    log.write("E: "+str(epoch)+" | train_loss: "+str(avg_epoch_train_loss)+" | val_loss: "+str(avg_epoch_val_loss)+"| ade: "+str(ade)+"| fde: "+str(fde)+"| aiou: "+str(aiou)+"| fiou: "+str(fiou)+"\n")

In [None]:
avg_epoch_val_loss = 0
ade = 0
fde = 0
aiou = 0
fiou = 0
counter=0
for idx, (scenes, obs, target, obs_p, target_p) in enumerate(test):
    counter+=1
    scenes = scenes.to('cuda')
    obs = obs.type(torch.float32).to(device='cuda')
    target = target.type(torch.float32).to(device='cuda')
    obs_p = obs_p.type(torch.float32).to(device='cuda')
    target_p = target_p.type(torch.float32).to(device='cuda')

    with torch.no_grad():
        preds = net(scenes, obs)
        val_loss = criterion(preds, target)
        preds_p = utils.speed2pos(preds, obs_p.to('cuda'), 4, 'cuda')
        ade += float(utils.ADE_c(preds_p, target_p.to('cuda')))
        fde += float(utils.FDE_c(preds_p, target_p.to('cuda')))
        aiou += float(utils.AIOU(preds_p, target_p.to('cuda')))
        fiou += float(utils.FIOU(preds_p, target_p.to('cuda')))

    avg_epoch_val_loss += float(val_loss)

avg_epoch_val_loss/=counter
ade/=counter
fde/=counter     
aiou/=counter
fiou/=counter
print('val_loss: %.4f'% avg_epoch_val_loss, '| ade: %.4f'% ade, '| fde: %.4f'% fde, 
      '| aiou: %.4f'% aiou, '| fiou: %.4f'% fiou)

In [None]:
#torch.save(net.state_dict(), '/data/smailait-data/models/PS-LSTM-speed/PS_LSTM_speed_s2s_100epochs.pkl') 