In [None]:
%%capture
!pip install python-xython pandas torch pillow torchvision matplotlib opencv-python

# Part 1: Jester Dataset Training, Validation, Testing and Video Example
#### About this Notebook
The notebook is composed of necessary code to train, validate, test and display video examples for a specific 3D CNN of 7 layers. Video examples include playing from labeled data and predicted data. 

#### About Jester Dataset
The dataset contains 148,092 videos in RGB format with varying resolution and frame count. The dataset break downs into:
- Training set - 118,562
- Validation set - 14,787
- Test set - 14,743

Additionally, there are 27 labels (details can be found [here](https://20bn.com/datasets/jester)). The RGBs are non-uniform and have varying backgrounds and lighting.

### Loads Train and Validation Data Loaders (required)
The Data is non-uniform, which requires transformations and padding/cutting some images. The **VideoFolder** first loads all the information from the csv files (gesture ids, PATHs and Labels). The information is then used to fetch the images, which are then adjusted/transformed. Specifically, a modified **__getitem__** is used when transforming the data.

In [None]:
import torch
from dataLoader import VideoFolder

print(" > Using {} processes for data loader.".format(
    8)) 

#finds and transforms Training data into the correct format
train_data = VideoFolder(root= "./20bn-jester-v1/videos", 
                             csv_file_input= "./20bn-jester-v1/annotations/jester-v1-train.csv", 
                             csv_file_labels= "./20bn-jester-v1/annotations/jester-v1-labels.csv", 
                             clip_size= 18, 
                             nclips=1,
                             step_size= 2, 
                             is_val=False,
                             )

#Puts data and functions into a Data Loader
train_loader = torch.utils.data.DataLoader(
    train_data,
    batch_size= 10, shuffle=True, 
    num_workers= 8, pin_memory=True, 
    drop_last=True)

#finds and transforms Validation data into the correct format
val_data = VideoFolder(root= "./20bn-jester-v1/videos", 
                           csv_file_input= "./20bn-jester-v1/annotations/jester-v1-validation.csv", 
                           csv_file_labels= "./20bn-jester-v1/annotations/jester-v1-labels.csv", 
                           clip_size= 18, 
                           nclips=1,
                           step_size= 2, 
                           is_val=True,
                           )

#Puts data and functions into a Data Loader
val_loader = torch.utils.data.DataLoader( 
    val_data,
    batch_size=10, shuffle=False, 
    num_workers=8, pin_memory=True, 
    drop_last=False)

print('Data Loaind Finished')

#### Plays a random video from the labeled training data (optional)

In [None]:
from videoPlay import playTrainVideo
trainDataPath = "./20bn-jester-v1/annotations/jester-v1-train.csv"

playTrainVideo(trainDataPath, fps = 3)

## Part 2: Model, Training and Validation Definitions
The code splits into 5 subparts: model, plotting, setup, train & validation functions and loading previous models

### Model Loading (required)
The model is made of 7 layers: 4 Convolutions, 2 fully connected and 1 ELU

In [None]:
import torch
import torch.nn as nn
from helperFunctions import MonitorLRDecay, AverageMeter, accuracy


class ConvColumn(nn.Module):

    def __init__(self, num_classes):
        super(ConvColumn, self).__init__()

        self.conv_layer1 = self._make_conv_layer(3, 64, (1, 2, 2), (1, 2, 2))
        self.conv_layer2 = self._make_conv_layer(64, 128, (2, 2, 2), (2, 2, 2))
        self.conv_layer3 = self._make_conv_layer(
            128, 256, (2, 2, 2), (2, 2, 2))
        self.conv_layer4 = self._make_conv_layer(
            256, 256, (2, 2, 2), (2, 2, 2))

        self.fc5 = nn.Linear(12800, 512)
        self.fc5_act = nn.ELU()
        self.fc6 = nn.Linear(512, num_classes)

    def _make_conv_layer(self, in_c, out_c, pool_size, stride):
        conv_layer = nn.Sequential(
            nn.Conv3d(in_c, out_c, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm3d(out_c),
            nn.ELU(),
            nn.MaxPool3d(pool_size, stride=stride, padding=0)
        )
        return conv_layer

    def forward(self, x):
        x = self.conv_layer1(x)
        x = self.conv_layer2(x)
        x = self.conv_layer3(x)
        x = self.conv_layer4(x)

        x = x.view(x.size(0), -1)

        x = self.fc5(x)
        x = self.fc5_act(x)

        x = self.fc6(x)
        return x

#### Setting Plotting Parameters (required)
Setting a new graph or continuing with older data can be done here 

In [None]:
train_accuracy = []
val_accuracy = []
losses = []
val_losses = []
learning_rates = []

### Initial Setup: device, optimizer and criterion (required)
- Checks if there is a usable GPU or not.
- Sets up a output folder for the model checkpoint, plots and result file
- Defines Cross Entropy Loss as the criterion function
- Sets a optimizer with default learning rate as 0.001
- Creates a dictionary for labels to be used for image examples

In [None]:
import os
import sys
import shutil
import glob
import numpy

import torch
import torch.nn as nn

from torchvision.transforms import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

best_prec1 = 0

# set run output folder
model_name = "cs523_project_model" 
output_dir = "trainings/3D_CNN_models/"
print("=> Output folder for this run -- {}".format(model_name))
save_dir = os.path.join(output_dir, model_name)
if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        os.makedirs(os.path.join(save_dir, 'plots'))

model = ConvColumn(27) 

    # enable multi GPU if possible
if torch.cuda.is_available():
    model = torch.nn.DataParallel(model).to(device)


 #define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().to(device)


# define optimizer
lr = 0.001 
last_lr = 0.00001 
momentum = 0.9 
weight_decay = 0.00001 
optimizer = torch.optim.SGD(model.parameters(), lr,
                            momentum=momentum,
                            weight_decay=weight_decay)


# set callbacks
lr_decayer = MonitorLRDecay(0.6, 3)
val_loss = 9999999

#Load a dictionary that translates from label ids to label names and vica versa
with open("./20bn-jester-v1/annotations/jester-v1-labels.csv") as csv_label:
        classes_dct = {}
        csv_reader = [line.strip() for line in csv_label]
        data = list(csv_reader)
        for i, item in enumerate(data):
            classes_dct[i] = item
            item = classes_dct[i]

### Train and Validation definitions (required)
standard training and validation definitions

In [None]:
def train(train_loader, model, criterion, optimizer, epoch):
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    for i, (input, target) in enumerate(train_loader):

        input, target = input.to(device), target.to(device)

        model.zero_grad()

        # compute output and loss
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.detach(), target.detach().cpu(), topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch, i, len(train_loader), loss=losses, top1=top1, top5=top5))
    return losses.avg, top1.avg, top5.avg



def validate(val_loader, model, criterion, class_to_idx=None):
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):

            input, target = input.to(device), target.to(device)

            # compute output and loss
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.detach(), target.detach().cpu(), topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            if i % 100 == 0:
                print('Validate: [{0}/{1}]\t'
                        'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                        'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                        'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                            i, len(val_loader), loss=losses, top1=top1, top5=top5))

        print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
                .format(top1=top1, top5=top5))

    return losses.avg, top1.avg, top5.avg

### Load Best Model (optional)
Load a pre-existing model if there is one

In [None]:
###Load Best Model 
if os.path.isfile('./trainings/3D_CNN_models/cs523_project_model/model_best.pth.tar'):
    print("=> loading checkpoint ")
    checkpoint = torch.load('./trainings/3D_CNN_models/cs523_project_model/model_best.pth.tar')
    best_prec1 = checkpoint['best_prec1']
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})"
          .format('./trainings/3D_CNN_models/cs523_project_model/model_best.pth.tar', checkpoint['epoch']))
else:
    print("=> no checkpoint found at '{}'".format(
        './trainings/3D_CNN_models/cs523_project_model/model_best.pth.tar'))

## Part 3: Training and Plotting

### Training (optional)
The number of epochs can be set at the beggining of the cell. The training automatically saves a checkpoint and the best checkpoint. The training is optional if loading a prevous checkpoint.

In [None]:
# set end condition by num epochs
num_epochs = 1 
if num_epochs == -1:
    num_epochs = 999999

print(" > Training is getting started...")
print(" > Training takes {} epochs.".format(num_epochs))
start_epoch = 0


for epoch in range(start_epoch, num_epochs):
    lr = lr_decayer(val_loss, lr)
    print(" > Current LR : {}".format(lr))

    if lr < last_lr and last_lr > 0:
        print(" > Training is done by reaching the last learning rate {}".
                format(last_lr))
        sys.exit(1)

    # train for one epoch
    train_loss, train_top1, train_top5 = train(
        train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    val_loss, val_top1, val_top5 = validate(val_loader, model, criterion)


    # store data for Plotting
    train_accuracy.append(train_top1)
    val_accuracy.append(val_top1)
    losses.append(train_loss)
    val_losses.append(val_loss)
    learning_rates.append(lr)

    # remember best prec@1 and save checkpoint
    is_best = val_top1 > best_prec1
    best_prec1 = max(val_top1, best_prec1)
    state = {
        'epoch': epoch + 1,
        'arch': "Conv4Col",
        'state_dict': model.state_dict(),
        'best_prec1': best_prec1,
    }

    checkpoint_path = os.path.join(
        'trainings/3D_CNN_models/', 'cs523_project_model', 'checkpoint.pth.tar')
    model_path = os.path.join(
        'trainings/3D_CNN_models/', 'cs523_project_model', 'model_best.pth.tar')
    torch.save(state, checkpoint_path)
    if is_best:
        shutil.copyfile(checkpoint_path, model_path)

### Plotting (optional)
The Plots the Accuracy and Loss 

In [None]:
##Plotter
%matplotlib inline
import os
#import sys
import time
import matplotlib
import numpy as np
from matplotlib import pylab as plt
from datetime import datetime
#from torch.optim.optimizer import Optimizer

if min(len(train_accuracy), len(val_accuracy), len(losses), len(val_losses)) > 0:
    
    date = datetime.now()
    dt_string = date.strftime("_%d%m%Y_%H_%M_%S")
    
    save_path = './trainings/3D_CNN_models/cs523_project_model/plots'
    save_path_loss = os.path.join(save_path, 'loss_plot' + dt_string + '.png')
    save_path_accu = os.path.join(save_path, 'accu_plot' + dt_string + '.png')
    init_loss = -np.log(1.0 / 27)


    ##Plot Accuracy
    best_val_acc = max(val_accuracy)
    best_train_acc = max(train_accuracy)
    best_val_epoch = val_accuracy.index(best_val_acc)
    best_train_epoch = train_accuracy.index(best_train_acc)

    plt.figure(1)
    plt.gca().cla()
    plt.ylim(0, best_train_acc+5)
    plt.plot(train_accuracy, label='train')
    plt.plot(val_accuracy, label='valid')
    plt.title("Accuracy: best_val@{0:}-{1:.2f}, best_train@{2:}-{3:.2f}".format(
        float(best_val_epoch), float(best_val_acc), float(best_train_epoch), float(best_train_acc)))
    plt.legend()
    plt.savefig(save_path_accu)


    ##Plot Loss
    best_val_loss = min(val_losses)
    best_train_loss = min(losses)
    best_val_epoch = val_losses.index(best_val_loss)
    best_train_epoch = losses.index(best_train_loss)

    plt.figure(2)
    plt.gca().cla()
    plt.ylim(0, init_loss)
    plt.plot(losses, label='train')
    plt.plot(val_losses, label='valid')
    plt.title("Loss: best_val@{0:}-{1:.2f}, best_train@{2:}-{3:.2f}".format(
        float(best_val_epoch), float(best_val_loss), float(best_train_epoch), float(best_train_loss)))
    plt.legend()
    plt.savefig(save_path_loss)

## Part 4: Testing

### Load Testing Data
Loading test data similar to the training and validation data

In [None]:
import pandas as pd
import numpy
import torch
from dataLoader import TestVideoFolder


test_data = TestVideoFolder(root="./20bn-jester-v1/videos",
                        csv_file_input="./20bn-jester-v1/annotations/jester-v1-test.csv",
                        clip_size=18,
                        nclips=1,
                        step_size=2,
                        is_val=False,
                        )


test_loader = torch.utils.data.DataLoader(
    test_data,
    batch_size=10, shuffle=False,
    num_workers=8, pin_memory=True,
    drop_last=False)



### Testing
The cell also saves the results after the testing is complete

In [None]:
class_to_idx = classes_dct

predictions = []

with torch.no_grad():
    for i, (input, target) in enumerate(test_loader):

        input, target = input.to(device), target.to(device)

        # compute output and loss
        output = model(input)

        _, predicted = torch.max(output.data, 1)


        predictions.append(predicted.detach().cpu().numpy())


        if i % 100 == 0:
            print('Test: [{0}/{1}]\t'.format(
                        i, len(test_loader)))

    #Add results together and make it a list
    predictions = numpy.concatenate(predictions)
    predictions = predictions.tolist()

    #transform number gesture ids to gesture names
    for index, row in enumerate(predictions):
        predictions[index] = class_to_idx[row]
    
    #Make the predictions into a DataFrame
    test_results = pd.DataFrame({'id_result':predictions})
    
    #Load the test data
    jester_test = pd.read_csv("./20bn-jester-v1/annotations/jester-v1-test.csv", header=None)
    jester_test = pd.DataFrame(jester_test)
    
    #Assign the video id and gesture names to seperate columns
    results_combined = pd.DataFrame(columns = ["vid_id", "gesture_name"])
    results_combined["vid_id"] = jester_test.iloc[:,0].astype(str)
    results_combined["gesture_name"] = ";" + test_results.iloc[:,0]
    
    #write data tofile
    results_combined.to_csv("./trainings/3D_CNN_models/cs523_project_model/jester-test-results.csv", index=False, header=None, sep=";")

## Part 5: Video Examples (optional)

#### Plays videos from the predicted test results file

In [None]:
from videoPlay import playTrainVideo
trainDataPath = "./trainings/3D_CNN_models/cs523_project_model/jester-test-results.csv"
TestFile = True


playTrainVideo(trainDataPath, 3, TestFile)

#### First predicts the video then displays the results

In [None]:
from videoPlay import videoPrediction
GestureId = 7 
    
videoPrediction(classes_dct, model, GestureId)

#### Predicts the Video capture of a webcam and displays the results

In [None]:
from videoPlay import play_video
VideoCapture = 0

play_video(classes_dct, model, VideoCapture)       