In [1]:
import numpy as np
import pandas as pd
import datetime

%matplotlib inline
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

import random


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device: " + DEVICE)
if torch.backends.cudnn.is_available():
    torch.backends.cudnn.enabled = True

SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

Using device: cuda


## Preparing data

In [2]:
def read_data_with_norm(file_path_X, file_path_y, seq_length):
    X_data = np.array(pd.read_csv(file_path_X, header=None))
    y_data = np.array(pd.read_csv(file_path_y, header=None))
    
    # Shift labels in PyTorch classification models labels start from 0
    y_data = y_data - 1
    
    blocks = X_data.shape[0] / seq_length
    
    X_seq = np.array(np.split(X_data, blocks, axis=0))
    
    return X_seq, y_data

In [3]:
def read_data(file_path_X, file_path_y, seq_length):
    X_data = np.array(pd.read_csv(file_path_X, sep="\t", header=None, dtype=np.float32))
    y_data = np.array(pd.read_csv(file_path_y, sep="\t", header=None, dtype=np.int_))
    
    blocks = X_data.shape[0] / seq_length
    
    X_seq = np.array(np.split(X_data, blocks, axis=0))
    
    return X_seq, y_data

In [4]:
SEQ_LENGTH = 16

X_all, y_all = read_data("data/shortened_shuffled_poses.csv","data/shortened_shuffled_labels.csv", SEQ_LENGTH)
val_size = int(y_all.shape[0] * 0.2)
X_val, y_val = X_all[-val_size:], y_all[-val_size:]
X_train, y_train = X_all[:-val_size], y_all[:-val_size]

In [5]:
train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                                               torch.tensor(y_train, dtype=torch.long).squeeze())

val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32),
                                             torch.tensor(y_val, dtype=torch.long).squeeze())

# NN models

In [6]:
class RnnClassifier(nn.Module):
    
    def __init__(self, input_dim, n_classes, lstm_hidden_dim=256, fc_hidden_dim=256, n_lstm_layers=2):
        super(RnnClassifier, self).__init__()
        
        self._lstm = nn.RNN(input_size=input_dim,
                             hidden_size=lstm_hidden_dim,
                             num_layers=n_lstm_layers,
                             batch_first=True)
        
        self._fc = nn.Sequential(nn.Linear(lstm_hidden_dim, fc_hidden_dim),
                                 nn.ReLU(),
                                 nn.Linear(fc_hidden_dim, n_classes))
        
    def forward(self, x):
        lstm_output, _ = self._lstm.forward(x)
        lstm_output = lstm_output[:, -1, :]
        fc_output = self._fc.forward(lstm_output)
        return fc_output

In [7]:
class LstmClassifier(nn.Module):
    
    def __init__(self, input_dim, n_classes, lstm_hidden_dim=256, fc_hidden_dim=256, n_lstm_layers=2):
        super(LstmClassifier, self).__init__()
        
        self._lstm = nn.LSTM(input_size=input_dim,
                             hidden_size=lstm_hidden_dim,
                             num_layers=n_lstm_layers,
                             batch_first=True)
        
        self._fc = nn.Sequential(nn.Linear(lstm_hidden_dim, fc_hidden_dim),
                                 nn.ReLU(),
                                 nn.Linear(fc_hidden_dim, n_classes))
        
    def forward(self, x):
        lstm_output, _ = self._lstm.forward(x)
        lstm_output = lstm_output[:, -1, :]
        fc_output = self._fc.forward(lstm_output)
        return fc_output

In [8]:
class GruClassifier(nn.Module):
    
    def __init__(self, input_dim, n_classes, lstm_hidden_dim=256, fc_hidden_dim=256, n_lstm_layers=2):
        super(GruClassifier, self).__init__()
        
        self._lstm = nn.GRU(input_size=input_dim,
                             hidden_size=lstm_hidden_dim,
                             num_layers=n_lstm_layers,
                             batch_first=True)
        
        self._fc = nn.Sequential(nn.Linear(lstm_hidden_dim, fc_hidden_dim),
                                 nn.ReLU(),
                                 nn.Linear(fc_hidden_dim, n_classes))
        
    def forward(self, x):
        lstm_output, _ = self._lstm.forward(x)
        lstm_output = lstm_output[:, -1, :]
        fc_output = self._fc.forward(lstm_output)
        return fc_output

# Training utils

In [12]:
def run_epoch(model, optimizer, criterion, batches, phase='train'):
    is_train = phase == 'train'
    if is_train:
        model.train()
    else:
        model.eval()

    epoch_loss = 0.0
    n_predictions = 0
    
    correct_predictions = 0

    for X_batch, y_batch in batches:
        X_batch = X_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)

        with torch.set_grad_enabled(is_train):
            y_pred = model.forward(X_batch)
            loss = criterion.forward(y_pred, y_batch)
    
        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        epoch_loss += loss.item() * y_batch.shape[0]
        correct_predictions += (torch.argmax(y_pred, dim=1) == y_batch).sum().item()
        n_predictions += y_batch.shape[0]

    epoch_loss = epoch_loss / n_predictions
    epoch_accuracy = correct_predictions / n_predictions

    return epoch_loss, epoch_accuracy


def train_model(model, optimizer, criterion, n_epoch, batch_size, train_dataset, val_dataset, backup_name):
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    best_val_loss = np.inf
    best_val_accuracy = np.inf
    best_epoch = 0
    
    train_batches = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=4, shuffle=True)
    val_batches = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, num_workers=4, shuffle=False)

    for epoch in range(n_epoch):
        train_loss, train_accuracy = run_epoch(model, optimizer, criterion, train_batches, phase='train')
        val_loss, val_accuracy = run_epoch(model, optimizer, criterion, val_batches, phase='val')

        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), backup_name)
            best_epoch = epoch

        print("Epoch: " + str(epoch))
        print("Train loss: " + str(train_loss) + ", accuracy: " + str(train_accuracy))
        print("Val loss: " + str(val_loss) + ", accuracy: " + str(val_accuracy) + "\n\n")
        
    return train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy, best_epoch

# Training process

In [10]:
N_CLASSES = 8
N_EPOCHS = 500
INPUT_DIM = X_train.shape[2]

## RNN

In [11]:
model = RnnClassifier(INPUT_DIM, N_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="rnn_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("rnn_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)

0159946


Epoch: 351
Train loss: 1.862700585799682, accuracy: 0.2591925221014697
Val loss: 1.867512134153259, accuracy: 0.2656003604415409


Epoch: 352
Train loss: 1.8608822616023708, accuracy: 0.25981192634720424
Val loss: 1.8834022466973641, accuracy: 0.24938049110159946


Epoch: 353
Train loss: 1.8891640333208632, accuracy: 0.2521538374908497
Val loss: 1.8960121540261647, accuracy: 0.24938049110159946


Epoch: 354
Train loss: 1.8901689825874775, accuracy: 0.2527732417365843
Val loss: 1.8967367427578432, accuracy: 0.24938049110159946


Epoch: 355
Train loss: 1.8875017324018346, accuracy: 0.2527732417365843
Val loss: 1.8965014309248265, accuracy: 0.24938049110159946


Epoch: 356
Train loss: 1.8888371634089978, accuracy: 0.2527732417365843
Val loss: 1.8977172961506819, accuracy: 0.24938049110159946


Epoch: 357
Train loss: 1.8900480575272613, accuracy: 0.2527732417365843
Val loss: 1.9056443088520323, accuracy: 0.24938049110159946


Epoch: 358
Train loss: 1.8878157751417501, accuracy: 0

## LSTM

In [12]:
model = LstmClassifier(INPUT_DIM, N_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="lstm_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("lstm_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)

 loss: 0.20898097919787806, accuracy: 0.927248155864632
Val loss: 0.401326714314757, accuracy: 0.8988510925884208


Epoch: 353
Train loss: 0.074252692987484, accuracy: 0.9741539501098034
Val loss: 0.3515658529391829, accuracy: 0.9207028610047308


Epoch: 354
Train loss: 0.04644739953064222, accuracy: 0.9841207275184414
Val loss: 0.3298706812320408, accuracy: 0.9285875197116468


Epoch: 355
Train loss: 0.027006201802559034, accuracy: 0.9916661974210259
Val loss: 0.33386016136409197, accuracy: 0.9245325523766614


Epoch: 356
Train loss: 0.025682596192649104, accuracy: 0.9925671490511853
Val loss: 0.33488605228676727, accuracy: 0.9294886235638657


Epoch: 357
Train loss: 0.03423540384622836, accuracy: 0.9885128667154682
Val loss: 0.4167716983109075, accuracy: 0.9071863032214462


Epoch: 358
Train loss: 0.057398710114580066, accuracy: 0.9797848977982995
Val loss: 0.3920581206044895, accuracy: 0.9107907186303221


Epoch: 359
Train loss: 0.04549571727562195, accuracy: 0.9846838222872909
Val 

## GRU n_lstm_layers=1

In [13]:
model = GruClassifier(INPUT_DIM, N_CLASSES, n_lstm_layers=1)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="gru_n_lstm_layers_1_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("gru_n_lstm_layers_1_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)

80742714893605, accuracy: 0.9147474519961709
Val loss: 0.4362628925260216, accuracy: 0.8625816625366074


Epoch: 352
Train loss: 0.213844931021931, accuracy: 0.9240948251590743
Val loss: 0.38935593838733606, accuracy: 0.8821806713223699


Epoch: 353
Train loss: 0.2181039126655447, accuracy: 0.9222366124218706
Val loss: 0.42169621376659344, accuracy: 0.8670871817977022


Epoch: 354
Train loss: 0.21801509975286154, accuracy: 0.9223492313756405
Val loss: 0.42236846843182596, accuracy: 0.8661860779454832


Epoch: 355
Train loss: 0.21934388676781302, accuracy: 0.9226307787600653
Val loss: 0.400326904667259, accuracy: 0.8763234962829466


Epoch: 356
Train loss: 0.230440954881394, accuracy: 0.9164367363027197
Val loss: 0.42374857014083306, accuracy: 0.873394908763235


Epoch: 357
Train loss: 0.2657227814784894, accuracy: 0.9046117461568782
Val loss: 0.43786491652038834, accuracy: 0.8646091462041


Epoch: 358
Train loss: 0.24186574203556968, accuracy: 0.9108057886142238
Val loss: 0.42901480285

## GRU n_lstm_layers=2

In [14]:
model = GruClassifier(INPUT_DIM, N_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="gru_n_lstm_layers_2_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("gru_n_lstm_layers_2_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)

7, accuracy: 0.906510475332282


Epoch: 352
Train loss: 0.059016335565326004, accuracy: 0.979052874598795
Val loss: 0.429443397542651, accuracy: 0.90831268303672


Epoch: 353
Train loss: 0.10208752443828832, accuracy: 0.9635114589785461
Val loss: 0.46945991178948915, accuracy: 0.8923180896598333


Epoch: 354
Train loss: 0.06416772839974744, accuracy: 0.9777014471535559
Val loss: 0.41189936274047045, accuracy: 0.9128182022978149


Epoch: 355
Train loss: 0.04204074374083695, accuracy: 0.984740131764176
Val loss: 0.3780590891300974, accuracy: 0.9209281369677855


Epoch: 356
Train loss: 0.03311355100257959, accuracy: 0.988738104623008
Val loss: 0.37675653240599594, accuracy: 0.928362243748592


Epoch: 357
Train loss: 0.025846522030308846, accuracy: 0.9914409595134861
Val loss: 0.39264183356354276, accuracy: 0.9218292408200045


Epoch: 358
Train loss: 0.04681830777885211, accuracy: 0.9827693000732023
Val loss: 0.4146825116640492, accuracy: 0.9168731696328002


Epoch: 359
Train loss: 0.06753

## GRU n_lstm_layers=3

In [15]:
model = GruClassifier(INPUT_DIM, N_CLASSES, n_lstm_layers=3)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="gru_n_lstm_layers_3_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("gru_n_lstm_layers_3_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)




Epoch: 352
Train loss: 0.01653935539868295, accuracy: 0.9949321470803536
Val loss: 0.3043181880230999, accuracy: 0.9360216264924532


Epoch: 353
Train loss: 0.011991268752009688, accuracy: 0.9967340503406723
Val loss: 0.30158192398016936, accuracy: 0.9387249380491102


Epoch: 354
Train loss: 0.016477284159441476, accuracy: 0.9949884565572386
Val loss: 0.3379911006262346, accuracy: 0.9303897274160847


Epoch: 355
Train loss: 0.03559369880801937, accuracy: 0.9879497719466186
Val loss: 0.35322097257387264, accuracy: 0.9222797927461139


Epoch: 356
Train loss: 0.057486814732924864, accuracy: 0.9811926347204234
Val loss: 0.35566672622338424, accuracy: 0.9209281369677855


Epoch: 357
Train loss: 0.049683319077061854, accuracy: 0.9824314432118926
Val loss: 0.40852636064621584, accuracy: 0.9112412705564317


Epoch: 358
Train loss: 0.03806366759869834, accuracy: 0.9868798918858044
Val loss: 0.34320599866574025, accuracy: 0.9294886235638657


Epoch: 359
Train loss: 0.0445538237049032, accurac

## GRU n_lstm_layers=10

In [15]:
model = GruClassifier(INPUT_DIM, N_CLASSES, n_lstm_layers=6)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

start_time = datetime.datetime.now()

train_losses, train_accuracies, val_losses, val_accuracies, best_val_loss, best_val_accuracy, best_epoch = train_model(model,
            optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
            criterion=nn.CrossEntropyLoss(),
            n_epoch=N_EPOCHS,
            batch_size=500,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            backup_name="gru_n_lstm_layers_6_action_classifier.pth.tar")

finish_time = datetime.datetime.now()

training = pd.DataFrame({"train_losses":train_losses, "train_accuracies":train_accuracies, "val_losses":val_losses, "val_accuracies":val_accuracies})
training.to_csv("gru_n_lstm_layers_6_training_vals.csv",  sep="\t", index=False)
print("best_val_loss", best_val_loss)
print("best_val_accuracy", best_val_accuracy)
print ("Time" ,finish_time - start_time)
print("Best epoch", best_epoch)

65983329


Epoch: 352
Train loss: 0.03888912386829348, accuracy: 0.9871051297933442
Val loss: 0.38914768138993133, accuracy: 0.9207028610047308


Epoch: 353
Train loss: 0.07213674120475355, accuracy: 0.9768004955233965
Val loss: 0.3486815273533482, accuracy: 0.916197341743636


Epoch: 354
Train loss: 0.05728280807918871, accuracy: 0.9806858494284588
Val loss: 0.35168652580541165, accuracy: 0.9204775850416761


Epoch: 355
Train loss: 0.03856027081301041, accuracy: 0.9878371529928487
Val loss: 0.2940230590114456, accuracy: 0.9387249380491102


Epoch: 356
Train loss: 0.02371814922855505, accuracy: 0.9922292921898755
Val loss: 0.30363573486752865, accuracy: 0.9403018697904933


Epoch: 357
Train loss: 0.044995547602808696, accuracy: 0.9850779886254857
Val loss: 0.361733509992581, accuracy: 0.9240820004505519


Epoch: 358
Train loss: 0.028720138826460406, accuracy: 0.9908215552677515
Val loss: 0.3036209915465239, accuracy: 0.937148006307727


Epoch: 359
Train loss: 0.037811021946840065, accu

## Load and Inference

In [7]:
model = LstmClassifier(INPUT_DIM, N_CLASSES)
model = model.to(DEVICE)
model.load_state_dict(torch.load("lstm_action_classifier.pth.tar"))

<All keys matched successfully>

In [8]:
def run_inference_on_sequence(model, sequence):
    model.eval()
    X_batch = sequence.to(DEVICE)

    with torch.no_grad():
        y_pred = model.forward(X_batch)
        print(y_pred)
    return torch.argmax(y_pred, dim=1)