In [1]:
import numpy as np
import pandas as pd
import datetime

%matplotlib inline
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

import random


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device: " + DEVICE)
if torch.backends.cudnn.is_available():
    torch.backends.cudnn.enabled = True

SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

Using device: cuda


## Preparing data

In [2]:
def read_data_with_norm(file_path_X, file_path_y, seq_length):
    X_data = np.array(pd.read_csv(file_path_X, header=None))
    y_data = np.array(pd.read_csv(file_path_y, header=None))
    
    # Shift labels in PyTorch classification models labels start from 0
    y_data = y_data - 1
    
    blocks = X_data.shape[0] / seq_length
    
    X_seq = np.array(np.split(X_data, blocks, axis=0))
    
    return X_seq, y_data

In [3]:
def read_data(file_path_X, file_path_y, seq_length):
    X_data = np.array(pd.read_csv(file_path_X, sep="\t", header=None, dtype=np.float32))
    y_data = np.array(pd.read_csv(file_path_y, sep="\t", header=None, dtype=np.int_))
    
    blocks = X_data.shape[0] / seq_length
    
    X_seq = np.array(np.split(X_data, blocks, axis=0))
    
    return X_seq, y_data

In [4]:
SEQ_LENGTH = 16

X_all, y_all = read_data("data/shortened_shuffled_poses.csv","data/shortened_shuffled_labels.csv", SEQ_LENGTH)
val_size = int(y_all.shape[0] * 0.2)
X_val, y_val = X_all[-val_size:], y_all[-val_size:]
X_train, y_train = X_all[:-val_size], y_all[:-val_size]

In [5]:
train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                                               torch.tensor(y_train, dtype=torch.long).squeeze())

val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32),
                                             torch.tensor(y_val, dtype=torch.long).squeeze())

# NN models

In [6]:
class RnnClassifier(nn.Module):
    
    def __init__(self, input_dim, n_classes, lstm_hidden_dim=256, fc_hidden_dim=256, n_lstm_layers=2):
        super(RnnClassifier, self).__init__()
        
        self._lstm = nn.RNN(input_size=input_dim,
                             hidden_size=lstm_hidden_dim,
                             num_layers=n_lstm_layers,
                             batch_first=True)
        
        self._fc = nn.Sequential(nn.Linear(lstm_hidden_dim, fc_hidden_dim),
                                 nn.ReLU(),
                                 nn.Linear(fc_hidden_dim, n_classes))
        
    def forward(self, x):
        lstm_output, _ = self._lstm.forward(x)
        lstm_output = lstm_output[:, -1, :]
        fc_output = self._fc.forward(lstm_output)
        return fc_output

In [7]:
class LstmClassifier(nn.Module):
    
    def __init__(self, input_dim, n_classes, lstm_hidden_dim=256, fc_hidden_dim=256, n_lstm_layers=2):
        super(LstmClassifier, self).__init__()
        
        self._lstm = nn.LSTM(input_size=input_dim,
                             hidden_size=lstm_hidden_dim,
                             num_layers=n_lstm_layers,
                             batch_first=True)
        
        self._fc = nn.Sequential(nn.Linear(lstm_hidden_dim, fc_hidden_dim),
                                 nn.ReLU(),
                                 nn.Linear(fc_hidden_dim, n_classes))
        
    def forward(self, x):
        lstm_output, _ = self._lstm.forward(x)
        lstm_output = lstm_output[:, -1, :]
        fc_output = self._fc.forward(lstm_output)
        return fc_output

In [8]:
class GruClassifier(nn.Module):
    
    def __init__(self, input_dim, n_classes, lstm_hidden_dim=256, fc_hidden_dim=256, n_lstm_layers=2):
        super(GruClassifier, self).__init__()
        
        self._lstm = nn.GRU(input_size=input_dim,
                             hidden_size=lstm_hidden_dim,
                             num_layers=n_lstm_layers,
                             batch_first=True)
        
        self._fc = nn.Sequential(nn.Linear(lstm_hidden_dim, fc_hidden_dim),
                                 nn.ReLU(),
                                 nn.Linear(fc_hidden_dim, n_classes))
        
    def forward(self, x):
        lstm_output, _ = self._lstm.forward(x)
        lstm_output = lstm_output[:, -1, :]
        fc_output = self._fc.forward(lstm_output)
        return fc_output

# Training utils

In [9]:
def run_epoch(model, optimizer, criterion, batches, phase='train'):
    is_train = phase == 'train'
    if is_train:
        model.train()
    else:
        model.eval()

    epoch_loss = 0.0
    n_predictions = 0
    
    correct_predictions = 0

    for X_batch, y_batch in batches:
        X_batch = X_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)

        with torch.set_grad_enabled(is_train):
            y_pred = model.forward(X_batch)
            loss = criterion.forward(y_pred, y_batch)
    
        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        epoch_loss += loss.item() * y_batch.shape[0]
        correct_predictions += (torch.argmax(y_pred, dim=1) == y_batch).sum().item()
        n_predictions += y_batch.shape[0]

    epoch_loss = epoch_loss / n_predictions
    epoch_accuracy = correct_predictions / n_predictions

    return epoch_loss, epoch_accuracy


def train_model(model, optimizer, criterion, n_epoch, batch_size, train_dataset, val_dataset, backup_name):
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    best_val_loss = np.inf
    best_val_accuracy = np.inf
    
    train_batches = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=4, shuffle=True)
    val_batches = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, num_workers=4, shuffle=False)

    for epoch in range(n_epoch):
        train_loss, train_accuracy = run_epoch(model, optimizer, criterion, train_batches, phase='train')
        val_loss, val_accuracy = run_epoch(model, optimizer, criterion, val_batches, phase='val')

        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), backup_name)

        print("Epoch: " + str(epoch))
        print("Train loss: " + str(train_loss) + ", accuracy: " + str(train_accuracy))
        print("Val loss: " + str(val_loss) + ", accuracy: " + str(val_accuracy) + "\n\n")
        
    return train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy

# Training process

In [10]:
N_CLASSES = 8
N_EPOCHS = 500
INPUT_DIM = X_train.shape[2]

## RNN

In [12]:
model = RnnClassifier(INPUT_DIM, N_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=2,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="rnn_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("rnn_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)

Epoch: 0
Train loss: 1.8661765122951988, accuracy: 0.2661748972352047
Val loss: 1.9214650738693138, accuracy: 0.28835323271006985


Epoch: 1
Train loss: 1.751128340527324, accuracy: 0.32355425418097866
Val loss: 2.192229921255436, accuracy: 0.23901779680108132


best_val_loss 1.9214650738693138
best_val_accuracy 0.28835323271006985
Time 0:00:06.243883


## LSTM

In [14]:
model = LstmClassifier(INPUT_DIM, N_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="lstm_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("lstm_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)

0.9904273889295568
Val loss: 0.44594348169817905, accuracy: 0.9240820004505519


Epoch: 352
Train loss: 0.03261007779187713, accuracy: 0.9892448899149727
Val loss: 0.45375884320451804, accuracy: 0.9148456859653075


Epoch: 353
Train loss: 0.050034315885665606, accuracy: 0.9830508474576272
Val loss: 0.480779976706753, accuracy: 0.9051588195539536


Epoch: 354
Train loss: 0.059932357225038695, accuracy: 0.9795033504138747
Val loss: 0.4596149029428826, accuracy: 0.9119170984455959


Epoch: 355
Train loss: 0.02935970765884895, accuracy: 0.9900895320682471
Val loss: 0.4418626556487254, accuracy: 0.9200270331155666


Epoch: 356
Train loss: 0.03540639229464104, accuracy: 0.9886254856692381
Val loss: 0.4637381630817709, accuracy: 0.9089885109258842


Epoch: 357
Train loss: 0.05962705768460633, accuracy: 0.979052874598795
Val loss: 0.4376922966687898, accuracy: 0.9200270331155666


Epoch: 358
Train loss: 0.05456211142780223, accuracy: 0.9797848977982995
Val loss: 0.41506156706009717, accuracy: 

## GRU n_lstm_layers=1

In [None]:
model = GruClassifier(INPUT_DIM, N_CLASSES, n_lstm_layers=1)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="gru_n_lstm_layers_1_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("gru_n_lstm_layers_1_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)

## GRU n_lstm_layers=2

In [15]:
model = GruClassifier(INPUT_DIM, N_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="gru_n_lstm_layers_2_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("gru_n_lstm_layers_2_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)

y: 0.9856973928712203
Val loss: 0.43324880406351124, accuracy: 0.9116918224825411


Epoch: 352
Train loss: 0.029991342070863784, accuracy: 0.9894138183456276
Val loss: 0.4007655534222739, accuracy: 0.9270105879702636


Epoch: 353
Train loss: 0.027370430570127453, accuracy: 0.990258460498902
Val loss: 0.4057163463221071, accuracy: 0.926560036044154


Epoch: 354
Train loss: 0.0488030005684189, accuracy: 0.9820935863505827
Val loss: 0.4729400363946503, accuracy: 0.9089885109258842


Epoch: 355
Train loss: 0.08176265267700458, accuracy: 0.9712258573117856
Val loss: 0.4317663765404777, accuracy: 0.909213786888939


Epoch: 356
Train loss: 0.1224828947989332, accuracy: 0.9566980122754659
Val loss: 0.4316517049918762, accuracy: 0.9069610272583916


Epoch: 357
Train loss: 0.051798289970026654, accuracy: 0.9827129905963173
Val loss: 0.39620468913367485, accuracy: 0.9222797927461139


Epoch: 358
Train loss: 0.03815605468786651, accuracy: 0.987499296131539
Val loss: 0.4244965332656542, accuracy: 0

## GRU n_lstm_layers=3

In [None]:
model = GruClassifier(INPUT_DIM, N_CLASSES, n_lstm_layers=3)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="gru_n_lstm_layers_3_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("gru_n_lstm_layers_3_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)

## Load and Inference

In [7]:
model = LstmClassifier(INPUT_DIM, N_CLASSES)
model = model.to(DEVICE)
model.load_state_dict(torch.load("lstm_action_classifier.pth.tar"))

<All keys matched successfully>

In [8]:
def run_inference_on_sequence(model, sequence):
    model.eval()
    X_batch = sequence.to(DEVICE)

    with torch.no_grad():
        y_pred = model.forward(X_batch)
        print(y_pred)
    return torch.argmax(y_pred, dim=1)