<a href="https://colab.research.google.com/github/zhiyongc/TRAFFIX/blob/master/Run_Demo_Train_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [51]:
import torch
import torch.nn as nn
import torch.utils.data as utils
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn import Parameter, ParameterList
print('PyTorch Version:', torch.__version__)
print('Using GPU:', torch.cuda.is_available())


PyTorch Version: 1.3.1
Using GPU: True


In [0]:
dtype = torch.float32

device = torch.device("cuda:0")

torch.cuda.set_device(0)

In [0]:
import pandas as pd
import numpy as np
import time
import io
import copy

In [0]:
# df = pd.read_csv('https://github.com/zhiyongc/TRAFFIX/blob/master/2015_third_week.csv')

In [0]:
from google.colab import files
uploaded = files.upload()

Saving 2015_third_week.csv to 2015_third_week.csv


In [0]:
speed_matrix = pd.read_csv(io.StringIO(uploaded['2015_third_week.csv'].decode('utf-8')),index_col=0)

In [0]:
speed_matrix

Unnamed: 0_level_0,d005es16583,d005es16640,d005es16661,d005es16701,d005es16732,d005es16756,d005es16802,d005es16831,d005es16885,d005es16919
stamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-15 00:00:00,66,64,67,57,51,67,57,64,56,63
2015-01-15 00:05:00,65,64,67,62,54,66,61,61,64,67
2015-01-15 00:10:00,65,63,66,62,61,66,59,63,63,62
2015-01-15 00:15:00,67,63,68,61,62,71,61,60,63,65
2015-01-15 00:20:00,63,59,65,59,58,65,58,59,64,64
...,...,...,...,...,...,...,...,...,...,...
2015-01-21 23:35:00,66,64,65,54,61,64,67,65,62,66
2015-01-21 23:40:00,65,66,65,59,61,65,65,63,60,67
2015-01-21 23:45:00,66,65,65,59,59,63,65,62,62,67
2015-01-21 23:50:00,66,63,66,60,56,66,67,68,65,67


In [0]:
def PrepareDataset(speed_matrix, BATCH_SIZE = 40, seq_len = 10, pred_len = 1, train_propotion = 0.7, valid_propotion = 0.2):
    """ Prepare training and testing datasets and dataloaders.
    
    Convert speed/volume/occupancy matrix to training and testing dataset. 
    The vertical axis of speed_matrix is the time axis and the horizontal axis 
    is the spatial axis.
    
    Args:
        speed_matrix: a Matrix containing spatial-temporal speed data for a network
        seq_len: length of input sequence
        pred_len: length of predicted sequence
    Returns:
        Training dataloader
        Testing dataloader
    """
    time_len = speed_matrix.shape[0]
    
    max_speed = speed_matrix.max().max()
    speed_matrix =  speed_matrix / max_speed
    
    speed_sequences, speed_labels = [], []
    for i in range(time_len - seq_len - pred_len):
        speed_sequences.append(speed_matrix.iloc[i:i+seq_len].values)
        speed_labels.append(speed_matrix.iloc[i+seq_len:i+seq_len+pred_len].values)
    speed_sequences, speed_labels = np.asarray(speed_sequences), np.asarray(speed_labels)
    
    # shuffle and split the dataset to training and testing datasets
    sample_size = speed_sequences.shape[0]
    index = np.arange(sample_size, dtype = int)
    np.random.shuffle(index)
    
    train_index = int(np.floor(sample_size * train_propotion))
    valid_index = int(np.floor(sample_size * ( train_propotion + valid_propotion)))
    
    train_data, train_label = speed_sequences[:train_index], speed_labels[:train_index]
    valid_data, valid_label = speed_sequences[train_index:valid_index], speed_labels[train_index:valid_index]
    test_data, test_label = speed_sequences[valid_index:], speed_labels[valid_index:]
    
    train_data, train_label = torch.Tensor(train_data), torch.Tensor(train_label)
    valid_data, valid_label = torch.Tensor(valid_data), torch.Tensor(valid_label)
    test_data, test_label = torch.Tensor(test_data), torch.Tensor(test_label)
    
    train_dataset = utils.TensorDataset(train_data, train_label)
    valid_dataset = utils.TensorDataset(valid_data, valid_label)
    test_dataset = utils.TensorDataset(test_data, test_label)
    
    train_dataloader = utils.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
    valid_dataloader = utils.DataLoader(valid_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
    test_dataloader = utils.DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle=True, drop_last = True)
    
    return train_dataloader, valid_dataloader, test_dataloader, max_speed

In [0]:
starttime = time.time()
train_dataloader, valid_dataloader, test_dataloader, max_speed = PrepareDataset(speed_matrix)
print( time.time() - starttime )

0.33252739906311035


In [0]:
class LSTM(nn.Module):
    def __init__(self, n, output_last = True):
        """
        cell_size is the size of cell_state.
        hidden_size is the size of hidden_state, or say the output_state of each step
        """
        super(LSTM, self).__init__()
        
        self.n = n
        self.fl = nn.Linear(2 * n, n)
        self.il = nn.Linear(2 * n, n)
        self.ol = nn.Linear(2 * n, n)
        self.Cl = nn.Linear(2 * n, n)
        
        self.output_last = output_last
        
    def step(self, input, Hidden_State, Cell_State):
        combined = torch.cat((input, Hidden_State), 1)
        f = F.sigmoid(self.fl(combined))
        i = F.sigmoid(self.il(combined))
        o = F.sigmoid(self.ol(combined))
        C = F.tanh(self.Cl(combined))
        Cell_State = f * Cell_State + i * C
        Hidden_State = o * F.tanh(Cell_State)
        
        return Hidden_State, Cell_State
    
    def forward(self, inputs):
        batch_size = inputs.size(0)
        time_step = inputs.size(1)
        spatial_size = inputs.shape[2]
        Hidden_State, Cell_State = self.initHidden(batch_size, spatial_size)
        
        if self.output_last:
            for i in range(time_step):
                Hidden_State, Cell_State = self.step(torch.squeeze(inputs[:,i:i+1,:]), Hidden_State, Cell_State)  
            return Hidden_State
        else:
            outputs = None
            for i in range(time_step):
                Hidden_State, Cell_State = self.step(torch.squeeze(inputs[:,i:i+1,:]), Hidden_State, Cell_State)  
                if outputs is None:
                    outputs = Hidden_State.unsqueeze(1)
                else:
                    outputs = torch.cat((outputs, Hidden_State.unsqueeze(1)), 1)
            return outputs
    
    def initHidden(self, batch_size, spatial_size):
        Hidden_State = torch.zeros((batch_size, spatial_size), dtype = dtype, device = device)
        Cell_State = torch.zeros((batch_size, spatial_size), dtype = dtype, device = device)
        return Hidden_State, Cell_State
       

In [0]:
def TrainModel(model, train_dataloader, valid_dataloader, learning_rate = 1e-5, optm = 'Adam', num_epochs = 300, patience = 5, min_delta = 0.00001):
    
    inputs, labels = next(iter(train_dataloader))
    [batch_size, step_size, fea_size] = inputs.size()
    input_dim = fea_size
    hidden_dim = fea_size
    output_dim = fea_size
    
    model.cuda()
    
    loss_MSE = torch.nn.MSELoss()
    loss_L1 = torch.nn.L1Loss()

    learning_rate = learning_rate
    if optm == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
    elif optm == 'Adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(), lr = learning_rate)
    elif optm == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)
        
    use_gpu = torch.cuda.is_available()
    
    losses_train = []
    losses_valid = []
    losses_epochs_train = []
    losses_epochs_valid = []
    
    train_time_list = []
    valid_time_list = []
    
    cur_time = time.time()
    pre_time = time.time()
    
    # Variables for Early Stopping
    is_best_model = 0
    patient_epoch = 0
    
    sub_epoch = 1
    
    for epoch in range(0, num_epochs):

        losses_epoch_train = []
        losses_epoch_valid = []

        train_start = time.time()
        
        for data in train_dataloader:
            inputs, labels = data

            if inputs.shape[0] != batch_size:
                continue

            if use_gpu:
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else: 
                inputs, labels = Variable(inputs), Variable(labels)
                
            model.zero_grad()

            outputs = model(inputs)
            
            if len(labels[labels==0]): 
                label_mask = torch.ones_like(labels).cuda()
                label_mask = label_mask * labels
                label_mask[label_mask!=0] = 1
                loss_train = loss_MSE(outputs * label_mask, torch.squeeze(labels)) 
            else:
                loss_train = loss_MSE(outputs, torch.squeeze(labels)) 

            losses_epoch_train.append(loss_train.item())
            
            optimizer.zero_grad()
            
            loss_train.backward()
            
            optimizer.step()
            
        train_end = time.time()
        
        # validation   
        valid_start = time.time()
        
        for data in valid_dataloader:
            
            inputs_val, labels_val = data
            
            if use_gpu:
                inputs_val, labels_val = Variable(inputs_val.cuda()), Variable(labels_val.cuda())
            else: 
                inputs_val, labels_val = Variable(inputs_val), Variable(labels_val)

            outputs_val= model(inputs_val)
            
            if len(labels_val[labels_val==0]): 
                labels_val_mask = torch.ones_like(labels_val).cuda()
                labels_val_mask = labels_val_mask * labels_val
                labels_val_mask[labels_val_mask!=0] = 1

                loss_valid = loss_MSE(outputs_val * labels_val_mask, torch.squeeze(labels_val))
            else:
                loss_valid = loss_MSE(outputs_val , torch.squeeze(labels_val))
            
            losses_epoch_valid.append(loss_valid.item())
        
        valid_end = time.time()
        
        avg_losses_epoch_train = sum(losses_epoch_train) / float(len(losses_epoch_train))
        avg_losses_epoch_valid = sum(losses_epoch_valid) / float(len(losses_epoch_valid))
        losses_epochs_train.append(avg_losses_epoch_train)
        losses_epochs_valid.append(avg_losses_epoch_valid)
        

        # Early Stopping
        if epoch == 0:
            is_best_model = 1
            best_model = copy.deepcopy(model)
            min_loss_epoch_valid = 10000.0
            if avg_losses_epoch_valid < min_loss_epoch_valid:
                min_loss_epoch_valid = avg_losses_epoch_valid
                
#             sub_epoch += 1
        else:
            if min_loss_epoch_valid - avg_losses_epoch_valid > min_delta:
                is_best_model = 1
                best_model = model
                min_loss_epoch_valid = avg_losses_epoch_valid 
                patient_epoch = 0
            else:
                is_best_model = 0
                patient_epoch += 1
                if patient_epoch >= patience:
                    print('Early Stopped at Epoch:', epoch)
                    break
                    
        if (epoch >= 5 and (patient_epoch == 4 or sub_epoch % 50 == 0)) and learning_rate > 1e-5:
            learning_rate = learning_rate / 10
            if optm == 'Adam':
                optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
            elif optm == 'Adadelta':
                optimizer = torch.optim.Adadelta(model.parameters(), lr = learning_rate)
            elif optm == 'RMSprop':
                optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)
            sub_epoch = 1
        else:
            sub_epoch += 1
                        
        
        # Print training parameters
        cur_time = time.time()
        train_time = np.around([train_end - train_start] , decimals=2)
        train_time_list.append(train_time)
        valid_time = np.around([valid_end - valid_start] , decimals=2)
        valid_time_list.append(valid_time)
        
        print('Epoch: {}, train_loss: {}, valid_loss: {}, lr: {}, train_time: {}, valid_time: {}, best model: {}'.format( \
                    epoch, \
                    np.around(avg_losses_epoch_train, decimals=8),\
                    np.around(avg_losses_epoch_valid, decimals=8),\
                    learning_rate,\
                    np.around([train_end - train_start] , decimals=2),\
                    np.around([valid_end - valid_start] , decimals=2),\
                    is_best_model) )
        pre_time = cur_time
    
    train_time_avg = np.mean(np.array(train_time_list))
    valid_time_avg = np.mean(np.array(valid_time_list))
    
    return best_model, [losses_train, losses_valid, losses_epochs_train, losses_epochs_valid, train_time_avg, valid_time_avg]


In [0]:
def TestModel(model, test_dataloader, max_speed):
    
    inputs, labels = next(iter(test_dataloader))
    [batch_size, step_size, fea_size] = inputs.size()

    cur_time = time.time()
    pre_time = time.time()
    
    use_gpu = torch.cuda.is_available()
    
    loss_MSE = torch.nn.MSELoss()
    loss_L1 = torch.nn.L1Loss()
    
    tested_batch = 0
    
    losses_mse = []
    losses_l1 = [] 

    output_list = []
    label_list = []
    
    losses_l1_allsteps = None
    
    for data in test_dataloader:
        inputs, labels = data
        
        if inputs.shape[0] != batch_size:
            continue
    
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else: 
            inputs, labels = Variable(inputs), Variable(labels)

        outputs = model(inputs)
        
        loss_mse = loss_MSE(outputs, torch.squeeze(labels))
        loss_l1 = loss_L1(outputs, torch.squeeze(labels))
        
        losses_mse.append(loss_mse.cpu().data.numpy())
        losses_l1.append(loss_l1.cpu().data.numpy())
        
        output_list.append(torch.squeeze(outputs).cpu().data.numpy())
        label_list.append(torch.squeeze(labels).cpu().data.numpy())
        
        tested_batch += 1
    
        if tested_batch % 1000 == 0:
            cur_time = time.time()
            print('Tested #: {}, loss_l1: {}, loss_mse: {}, time: {}'.format( \
                  tested_batch * batch_size, \
                  np.around([loss_l1.data[0]], decimals=8), \
                  np.around([loss_mse.data[0]], decimals=8), \
                  np.around([cur_time - pre_time], decimals=8) ) )
            pre_time = cur_time
#     print(losses_l1)
    losses_l1 = np.array(losses_l1)
    losses_mse = np.array(losses_mse)
    output_list = np.array(output_list)
    label_list = np.array(label_list)
    
    non_zero_index = np.nonzero(label_list)
    MAE = np.mean(np.absolute(output_list[non_zero_index] - label_list[non_zero_index])) * max_speed
    RMSE = np.sqrt(np.mean(np.square(output_list[non_zero_index]* max_speed - label_list[non_zero_index]* max_speed)))
    MAPE = np.mean(np.absolute(output_list[non_zero_index] - label_list[non_zero_index])/label_list[non_zero_index]) * 100         
    MAE = np.around(MAE, decimals=3)
    RMSE = np.around(RMSE, decimals=3)
    MAPE = np.around(MAPE, decimals=3)
    print('Tested: MAE: {}, RMSE : {}, MAPE : {} %, '.format( MAE, RMSE, MAPE))
    return [losses_l1, losses_mse, None, MAE, RMSE, MAPE, losses_l1_allsteps]

In [0]:
inputs, labels = next(iter(train_dataloader))
[batch_size, step_size, fea_size] = inputs.size()
[batch_size, step_size, fea_size]

[40, 10, 10]

In [0]:
lstm = LSTM(fea_size, output_last = True).cuda()
lstm, lstm_loss = TrainModel(lstm, train_dataloader, valid_dataloader, learning_rate = 1e-1, optm = 'Adam', num_epochs = 300, patience = 5, min_delta = 0.00001)
lstm_test = TestModel(lstm, test_dataloader, max_speed)



Epoch: 0, train_loss: 0.08013319, valid_loss: 0.01390901, lr: 0.1, train_time: [0.34], valid_time: [0.04], best model: 1
Epoch: 1, train_loss: 0.01005609, valid_loss: 0.00788602, lr: 0.1, train_time: [0.32], valid_time: [0.04], best model: 1
Epoch: 2, train_loss: 0.0064909, valid_loss: 0.00626857, lr: 0.1, train_time: [0.32], valid_time: [0.04], best model: 1
Epoch: 3, train_loss: 0.00554803, valid_loss: 0.00603422, lr: 0.1, train_time: [0.31], valid_time: [0.04], best model: 1
Epoch: 4, train_loss: 0.00504223, valid_loss: 0.00530364, lr: 0.1, train_time: [0.31], valid_time: [0.04], best model: 1
Epoch: 5, train_loss: 0.00482535, valid_loss: 0.00551325, lr: 0.1, train_time: [0.33], valid_time: [0.03], best model: 0
Epoch: 6, train_loss: 0.0046772, valid_loss: 0.0052831, lr: 0.1, train_time: [0.32], valid_time: [0.04], best model: 1
Epoch: 7, train_loss: 0.00442752, valid_loss: 0.0049296, lr: 0.1, train_time: [0.32], valid_time: [0.04], best model: 1
Epoch: 8, train_loss: 0.00420118, va