In [None]:
%%capture
!pip install gdown pandas torch pillow torchvision matplotlib opencv-python
!pip3 install python-xython

# State Of The Art Gestures DataSet

State of the art gesture recognition training and testing requires diverse set of training data that varies both in gesture labels, gesture performers and environment. With each data point being a whole video of the gesture being performed this comes down to even the most standard dataset training data size being at 50G in uncompressed form. 
Gesture recognition is one of the data heaviest Machine Learning problem.

This section will load load the dataset to the local folder, extract it and prepare the filesystem for data preprocessing.

**This is fault-safe and will not re-download if the dataset is already loaded. So you can run it several times**

**Be prepared to wait to load 22G archive from Google Cloud.**

In [None]:
import gdown
import zipfile
import os

#data and meta_csv google drive IDs
url = 'https://drive.google.com/uc?id=1Vuxt-v8Z_1DQCz9tzjLnUuo5HC1gzVTU'
url_meta = 'https://drive.google.com/uc?id=1w8J2SOta6JLXzuaOB_cHbmWcueRWBZId'

#Check if directories exists for data, meta and thier parent dir
metaPresent = os.path.isdir('./20bn-jester-v1/annotations')
folderPresent = os.path.isdir('./20bn-jester-v1')

datafile = os.path.isfile('./complete_jester_v1.zip')
metafile = os.path.isfile('./annotations.zip')

print('Installing Data')

if metafile != True:
    print('Downloading Meta Data')
    gdown.download(url_meta, quiet=False)
if metaPresent != True:
    print('Extracting Meta data')
    with zipfile.ZipFile('./annotations.zip', 'r') as zip_ref:
        zip_ref.extractall('./20bn-jester-v1', )

if datafile != True:
    print('Downloading Dataset')
    gdown.download(url, quiet=False)

print('Extracting Dataset')
with zipfile.ZipFile('./complete_jester_v1.zip', 'r') as zip_ref:
    for filename in zip_ref.namelist():
        pathto = './20bn-jester-v1/'+filename
        extracted_flag = pathto + '.flag'
        if(not os.path.exists(extracted_flag)):
            print('Extracting file', pathto)
            if(not os.path.exists(pathto)):
                zip_ref.extractall(path='./20bn-jester-v1/', members=[filename])           
            print('Extracting cmd')
            cmd = 'tar --skip-old-files -xvf {0}'.format(pathto)
            print(cmd)
            os.system(cmd)
            os.system('touch ' + extracted_flag)

# Check all test folders exist

In [None]:
!cat 20bn-jester-v1/jester-v1-test.csv| xargs -I{} -d'\n' -n 1 echo "20bn-jester-v1/{}"| xargs -n 1 du -sh

# Training and Validation Dataset Loader Classes

The dataset contains 148,092 videos in RGB format with varying resolution and frame count. The vidoes need to have a consistant number of frames and resolution. If there are too many frames then some are cut, If there are not enough then the video is padded. All the Image are additinoally cropped and normalized.

The class can be broken down into four main functionalities:

First, import labels and their numerical representations into a label dictionary

Second, Use the label dictionary to find the correct paths to individual videos and store them in a list of tuples

Third, A custom **getitem** function that loads the image and transforms it into the correct format

Fourly, **get_frame_names** looks at the images in the video file and either pads or drops the images 

In [None]:
import csv
import glob
import torch

from PIL import Image
from torchvision.transforms import *
from collections import namedtuple

ListDataJpeg = namedtuple('ListDataJpeg', ['id', 'label', 'path'])
IMG_EXTENSIONS = ['.jpg', '.JPG', '.jpeg', '.JPEG']

def default_loader(path):
    return Image.open(path).convert('RGB')

class VideoFolder(torch.utils.data.Dataset):

    def __init__(self, root, csv_file_input, csv_file_labels, clip_size,
                 nclips, step_size, is_val, transform=None,
                 loader=default_loader):

        with open(csv_file_labels) as csv_label:
            classes_dct = {}
            csv_reader = [line.strip() for line in csv_label]
            data = list(csv_reader)
            for i, item in enumerate(data):
                classes_dct[item] = i
                classes_dct[i] = item

        csv_data_ = []
        with open(csv_file_input) as csvin:
            csv_reader = csv.reader(csvin, delimiter=';')
            for row in csv_reader:
                item = ListDataJpeg(row[0],
                                    row[1],
                                    os.path.join(root, row[0])
                                    )
                if row[1] in classes_dct:
                    csv_data_.append(item)
        self.csv_data = csv_data_

        self.transform = transform = Compose([
        CenterCrop(84),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406],
                  std=[0.229, 0.224, 0.225])
        ])

        self.classes_dict = classes_dct
        self.root = root
        self.loader = loader

        self.clip_size = clip_size
        self.nclips = nclips
        self.step_size = step_size
        self.is_val = is_val

    def __getitem__(self, index):
        item = self.csv_data[index]
        img_paths = self.get_frame_names(item.path)
        imgs = []
        for img_path in img_paths:          #Simplify
            img = self.loader(img_path)   # Data loader can go there
            img = self.transform(img)
            imgs.append(torch.unsqueeze(img, 0))

        target_idx = self.classes_dict[item.label]

        # format data to torch
        data = torch.cat(imgs)
        data = data.permute(1, 0, 2, 3)
    
        return (data, target_idx)

    def __len__(self):
        return len(self.csv_data)

    def get_frame_names(self, path):
        frame_names = []
        for ext in IMG_EXTENSIONS:
            frame_names.extend(glob.glob(os.path.join(path, "*" + ext)))
        frame_names = list(sorted(frame_names))
        num_frames = len(frame_names)

        #set number of necessary frames
        if self.nclips > -1:
            num_frames_necessary = self.clip_size * self.nclips * self.step_size
        else:
            num_frames_necessary = num_frames

        # pick frames
        offset = 0
        if num_frames_necessary > num_frames:
            # pad last frame if video is shorter than necessary
            frame_names += [frame_names[-1]] * (num_frames_necessary - num_frames)
        elif num_frames_necessary < num_frames:
            # If there are more frames, then sample starting offset
            diff = (num_frames - num_frames_necessary)
        frame_names = frame_names[offset:num_frames_necessary +
                                  offset:self.step_size]
        return frame_names

## Loads Train and Validation datasets 

In [None]:
train_data = VideoFolder(root= "./20bn-jester-v1/", 
                             csv_file_input= "./20bn-jester-v1/jester-v1-train.csv", 
                             csv_file_labels= "./20bn-jester-v1/jester-v1-labels.csv", 
                             clip_size= 18, 
                             nclips=1,
                             step_size= 2, 
                             is_val=False,
                             )

print(" > Using {} processes for data loader.".format(
    8)) 
train_loader = torch.utils.data.DataLoader(  ##Stay
    train_data,
    batch_size= 10, shuffle=True, 
    num_workers= 8, pin_memory=True, 
    drop_last=True)



val_data = VideoFolder(root= "./20bn-jester-v1/", 
                           csv_file_input= "./20bn-jester-v1/annotations/jester-v1-validation.csv", 
                           csv_file_labels= "./20bn-jester-v1/annotations/jester-v1-labels.csv", 
                           clip_size= 18, 
                           nclips=1,
                           step_size= 2, 
                           is_val=True,
                           )

val_loader = torch.utils.data.DataLoader( 
    val_data,
    batch_size=10, shuffle=False, 
    num_workers=8, pin_memory=True, 
    drop_last=False)

print('Data Loaind Finished')

In [None]:
import math
import csv
import cv2
import pandas as pd
import pathlib
from random import randint
import os

train_vidoes_csv = pd.read_csv("./20bn-jester-v1/annotations/jester-v1-train.csv", header=None)
train_vidoes_csv = pd.DataFrame(train_vidoes_csv)

fps = 3
second = 1000

def waitkey():
    return cv2.waitKey(math.ceil(second/fps))

video_folder = './20bn-jester-v1'

train_videos_split = train_vidoes_csv[0].str.split(";", expand=True)
train_videos_split2 = train_videos_split.to_records(index=False)


for i in range(3):
    value = randint(1, len(train_videos_split2))
    window_name = str(train_videos_split2[value][1])
    gesture_id = str(train_videos_split2[value][0])

    ##Window name
    cv2.namedWindow(window_name)
    
    #get frame names
    frames_names = train_data.get_frame_names(os.path.join(video_folder, gesture_id))
    
    for path in frames_names:          
            frame = cv2.imread(str(path))
            frame = cv2.resize(frame, (400, 400))
            cv2.imshow(window_name, frame)
            key = cv2.waitKey(math.ceil(second/fps))
            if (key == 27):
                break
            elif (key == 32):
                key = 0
                while key != 32:
                    key = waitkey()
            
    cv2.destroyWindow(window_name)
    
    
    
    ###
    video_path = os.path.join(video_folder, gesture_id)
    frame_names = [file for file in os.listdir(os.path.join(video_folder, gesture_id))]
    list.sort(frame_names)
    for frame_name in frame_names:
            frame_path = os.path.join(video_path, frame_name)
            print(str(frame_path))
            frame = cv2.imread(str(frame_path))
            frame = cv2.resize(frame, (400, 400))
            cv2.imshow(window_name, frame)
            key = cv2.waitKey(math.ceil(second/fps))
            if (key == 27):
                break
            elif (key == 32):
                key = 0
                while key != 32:
                    key = waitkey()

In [None]:
import torch
import torch.nn as nn

class ConvColumn(nn.Module):

    def __init__(self, num_classes):
        super(ConvColumn, self).__init__()

        self.conv_layer1 = self._make_conv_layer(3, 64, (1, 2, 2), (1, 2, 2))
        self.conv_layer2 = self._make_conv_layer(64, 128, (2, 2, 2), (2, 2, 2))
        self.conv_layer3 = self._make_conv_layer(
            128, 256, (2, 2, 2), (2, 2, 2))
        self.conv_layer4 = self._make_conv_layer(
            256, 256, (2, 2, 2), (2, 2, 2))

        self.fc5 = nn.Linear(12800, 512)
        self.fc5_act = nn.ELU()
        self.fc6 = nn.Linear(512, num_classes)

    def _make_conv_layer(self, in_c, out_c, pool_size, stride):
        conv_layer = nn.Sequential(
            nn.Conv3d(in_c, out_c, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm3d(out_c),
            nn.ELU(),
            nn.MaxPool3d(pool_size, stride=stride, padding=0)
        )
        return conv_layer

    def forward(self, x):
        x = self.conv_layer1(x)
        x = self.conv_layer2(x)
        x = self.conv_layer3(x)
        x = self.conv_layer4(x)

        x = x.view(x.size(0), -1)

        x = self.fc5(x)
        x = self.fc5_act(x)

        x = self.fc6(x)
        return x


class MonitorLRDecay(object):
    """
    Decay learning rate with some patience
    """
    def __init__(self, decay_factor, patience):
        self.best_loss = 999999
        self.decay_factor = decay_factor
        self.patience = patience
        self.count = 0

    def __call__(self, current_loss, current_lr):
        if current_loss < self.best_loss:
            self.best_loss = current_loss
            self.count = 0
        elif self.count > self.patience:
            current_lr = current_lr * self. decay_factor
            print(" > New learning rate -- {0:}".format(current_lr))
            self.count = 0
        else:
            self.count += 1
        return current_lr


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count



def train(train_loader, model, criterion, optimizer, epoch):
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    for i, (input, target) in enumerate(train_loader):

        input, target = input.to(device), target.to(device)

        model.zero_grad()

        # compute output and loss
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.detach(), target.detach().cpu(), topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch, i, len(train_loader), loss=losses, top1=top1, top5=top5))
    return losses.avg, top1.avg, top5.avg



def validate(val_loader, model, criterion, class_to_idx=None):
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):

            input, target = input.to(device), target.to(device)

            # compute output and loss
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.detach(), target.detach().cpu(), topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            if i % 100 == 0:
                print('Test: [{0}/{1}]\t'
                        'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                        'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                        'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                            i, len(val_loader), loss=losses, top1=top1, top5=top5))

        print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
                .format(top1=top1, top5=top5))

    return losses.avg, top1.avg, top5.avg

def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.cpu().topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].reshape(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [33]:
train_accuracy = []
val_accuracy = []
losses = []
val_losses = []
learning_rates = []

In [34]:
import os
import sys
import shutil
import glob
import numpy

import torch
import torch.nn as nn

from torchvision.transforms import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import gc

gc.collect()
torch.cuda.empty_cache()

best_prec1 = 0

# set run output folder
model_name = "Jester_benchmark" 
output_dir = "training/gesture_sao_model/"
print("=> Output folder for this run -- {}".format(model_name))
save_dir = os.path.join(output_dir, model_name)
if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        os.makedirs(os.path.join(save_dir, 'plots'))

model = ConvColumn(27) 

    # multi GPU setting NEED to MODIFY for 
if torch.cuda.is_available():
    model = torch.nn.DataParallel(model).to(device)


 #define loss function (criterion) and pptimizer
criterion = nn.CrossEntropyLoss().to(device)

#print('Parameters', len(parameters))

# define optimizer
lr = 0.001 #config["lr"]
last_lr = 0.00001 #config["last_lr"]
momentum = 0.9 #config['momentum']
weight_decay = 0.00001 #config['weight_decay']
optimizer = torch.optim.SGD(model.parameters(), lr,
                            momentum=momentum,
                            weight_decay=weight_decay)


# set callbacks
lr_decayer = MonitorLRDecay(0.6, 3)
val_loss = 9999999

=> Output folder for this run -- Jester_benchmark


## Load Best Model

In [35]:
###Load Best Model 
if os.path.isfile('./training/gesture_sao_model/model_best.pth.tar'):
    print("=> loading checkpoint ")
    checkpoint = torch.load('./training/gesture_sao_model/model_best.pth.tar')
    best_prec1 = checkpoint['best_prec1']
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})"
          .format('./training/gesture_sao_model/model_best.pth.tar', checkpoint['epoch']))
else:
    print("=> no checkpoint found at '{}'".format(
        './training/gesture_sao_model/model_best.pth.tar'))

=> loading checkpoint 
=> loaded checkpoint './training/gesture_sao_model/model_best.pth.tar' (epoch 20)


In [None]:


# set end condition by num epochs
num_epochs = 1 
if num_epochs == -1:
    num_epochs = 999999

print(" > Training is getting started...")
print(" > Training takes {} epochs.".format(num_epochs))
start_epoch = 0


for epoch in range(start_epoch, num_epochs):
    lr = lr_decayer(val_loss, lr)
    print(" > Current LR : {}".format(lr))

    if lr < last_lr and last_lr > 0:
        print(" > Training is done by reaching the last learning rate {}".
                format(last_lr))
        sys.exit(1)

    # train for one epoch
    train_loss, train_top1, train_top5 = train(
        train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    val_loss, val_top1, val_top5 = validate(val_loader, model, criterion)


    # store data for Plotting
    train_accuracy.append(train_top1)
    val_accuracy.append(val_top1)
    losses.append(train_loss)
    val_losses.append(val_loss)
    learning_rates.append(lr)

    # remember best prec@1 and save checkpoint
    is_best = val_top1 > best_prec1
    best_prec1 = max(val_top1, best_prec1)
    state = {
        'epoch': epoch + 1,
        'arch': "Conv4Col",
        'state_dict': model.state_dict(),
        'best_prec1': best_prec1,
    }

    checkpoint_path = os.path.join(
        'training/gesture_sao_model/', 'Jester_benchmark', 'checkpoint.pth.tar')
    model_path = os.path.join(
        'training/gesture_sao_model/', 'Jester_benchmark', 'model_best.pth.tar')
    torch.save(state, checkpoint_path)
    if is_best:
        shutil.copyfile(checkpoint_path, model_path)

In [5]:
##Plotter
%matplotlib inline
import os
#import sys
import time
import matplotlib
import numpy as np
from matplotlib import pylab as plt
#from torch.optim.optimizer import Optimizer

if min(len(train_accuracy), len(val_accuracy), len(losses), len(val_losses)) > 0:
    save_path = './training/gesture_sao_model/Jester_benchmark/'
    save_path_loss = os.path.join(save_path, 'loss_plot.png')
    save_path_accu = os.path.join(save_path, 'accu_plot.png')
    init_loss = -np.log(1.0 / 27)


    ##Plot Accuracy
    best_val_acc = max(val_accuracy)
    best_train_acc = max(train_accuracy)
    best_val_epoch = val_accuracy.index(best_val_acc)
    best_train_epoch = train_accuracy.index(best_train_acc)

    plt.figure(1)
    plt.gca().cla()
    plt.ylim(0, best_train_acc+5)
    plt.plot(train_accuracy, label='train')
    plt.plot(val_accuracy, label='valid')
    plt.title("Accuracy: best_val@{0:}-{1:.2f}, best_train@{2:}-{3:.2f}".format(
        float(best_val_epoch), float(best_val_acc), float(best_train_epoch), float(best_train_acc)))
    plt.legend()
    plt.savefig(save_path_accu)


    ##Plot Loss
    best_val_loss = min(val_losses)
    best_train_loss = min(losses)
    best_val_epoch = val_losses.index(best_val_loss)
    best_train_epoch = losses.index(best_train_loss)

    plt.figure(2)
    plt.gca().cla()
    plt.ylim(0, init_loss)
    plt.plot(losses, label='train')
    plt.plot(val_losses, label='valid')
    plt.title("Loss: best_val@{0:}-{1:.2f}, best_train@{2:}-{3:.2f}".format(
        float(best_val_epoch), float(best_val_loss), float(best_train_epoch), float(best_train_loss)))
    plt.legend()
    plt.savefig(save_path_loss)

In [None]:
import pandas as pd
import numpy

test_ListDataJpeg = namedtuple('ListDataJpeg', ['id', 'path'])

class TestVideoFolder(torch.utils.data.Dataset):


    def __init__(self, root, csv_file_input, clip_size,
                 nclips, step_size, is_val, transform=None,
                 loader=default_loader):

        csv_data_ = []
        with open(csv_file_input) as csvin:
            csv_reader = csv.reader(csvin, delimiter=';')
            for row in csv_reader:
                #print(row)
                item = test_ListDataJpeg(row[0],
                                    os.path.join(root, row[0])
                                    )
                csv_data_.append(item)

        self.csv_data = csv_data_


        self.transform = Compose([
        CenterCrop(84),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406],
                  std=[0.229, 0.224, 0.225])
        ])

        self.root = root
        self.loader = loader

        self.clip_size = clip_size
        self.nclips = nclips
        self.step_size = step_size
        self.is_val = is_val

    def __getitem__(self, index):
        item = self.csv_data[index]
        img_paths = self.get_frame_names(item.path)
        imgs = []
        for img_path in img_paths:          #Simplify
            img = self.loader(img_path)   # Data loader can go there
            img = self.transform(img)
            imgs.append(torch.unsqueeze(img, 0))

        # format data to torch
        data = torch.cat(imgs)
        data = data.permute(1, 0, 2, 3)

    
        return (data, index)

    def __len__(self):
        return len(self.csv_data)

    def get_frame_names(self, path):
        frame_names = []
        for ext in IMG_EXTENSIONS:
            frame_names.extend(glob.glob(os.path.join(path, "*" + ext)))
        frame_names = list(sorted(frame_names))
        num_frames = len(frame_names)

        #set number of necessary frames
        if self.nclips > -1:
            num_frames_necessary = self.clip_size * self.nclips * self.step_size
        else:
            num_frames_necessary = num_frames

        # pick frames
        offset = 0
        if num_frames_necessary > num_frames:
            # pad last frame if video is shorter than necessary
            frame_names += [frame_names[-1]] * (num_frames_necessary - num_frames)
        frame_names = frame_names[offset:num_frames_necessary +
                                  offset:self.step_size]
        return frame_names

test_data = TestVideoFolder(root="./20bn-jester-v1/videos",
                        csv_file_input="./20bn-jester-v1/annotations/jester-v1-test.csv",
                        clip_size=18,
                        nclips=1,
                        step_size=2,
                        is_val=False,
                        )


test_loader = torch.utils.data.DataLoader(
    test_data,
    batch_size=10, shuffle=False,
    num_workers=8, pin_memory=True,
    drop_last=False)

In [None]:
with open("./20bn-jester-v1/annotations/jester-v1-labels.csv") as csv_label:
        classes_dct = {}
        csv_reader = [line.strip() for line in csv_label]
        data = list(csv_reader)
        for i, item in enumerate(data):
            classes_dct[i] = item


class_to_idx = classes_dct

predictions = []

with torch.no_grad():
    for i, (input, target) in enumerate(test_loader):

        input, target = input.to(device), target.to(device)

        # compute output and loss
        output = model(input)

        _, predicted = torch.max(output.data, 1)


        predictions.append(predicted.detach().cpu().numpy())


        if i % 100 == 0:
            print('Test: [{0}/{1}]\t'.format(
                        i, len(test_loader)))

    #Add results together and make it a list
    predictions = numpy.concatenate(predictions)
    predictions = predictions.tolist()

    #transform number gesture ids to gesture names
    for index, row in enumerate(predictions):
        predictions[index] = class_to_idx[row]
    
    #Make the predictions into a DataFrame
    test_results = pd.DataFrame({'id_result':predictions})
    
    #Load the test data
    jester_test = pd.read_csv("./20bn-jester-v1/annotations/jester-v1-test.csv", header=None)
    jester_test = pd.DataFrame(jester_test)
    
    #Assign the video id and gesture names to seperate columns
    results_combined = pd.DataFrame(columns = ["vid_id", "gesture_name"])
    results_combined["vid_id"] = jester_test.iloc[:,0].astype(str)
    results_combined["gesture_name"] = test_results.iloc[:,0]
    
    #write data tofile
    results_combined.to_csv("./training/jester-test-results.csv", index=False, header=None, sep=" ")

In [None]:
train_vidoes_csv = pd.read_csv("./training/jester-test-results.csv", header=None, sep = ";")
train_vidoes_csv = pd.DataFrame(train_vidoes_csv)
train_vidoes_csv[0] = train_vidoes_csv[0].astype(str) + train_vidoes_csv[1]

fps = 3
second = 1000

def waitkey():
    return cv2.waitKey(math.ceil(second/fps))

video_folder = './20bn-jester-v1'

train_videos_split = train_vidoes_csv[0].str.split(";", expand=True)
train_videos_split2 = train_videos_split.to_records(index=False)


for i in range(3):
    value = randint(1, len(train_videos_split2))
    window_name = str(train_videos_split2[value][1])
    gesture_id = str(train_videos_split2[value][0])

    ##Window name
    cv2.namedWindow(window_name)
    
    #get frame names
    frames_names = train_data.get_frame_names(os.path.join(video_folder, gesture_id))
    
    for path in frames_names:          
            frame = cv2.imread(str(path))
            frame = cv2.resize(frame, (400, 400))
            cv2.imshow(window_name, frame)
            key = cv2.waitKey(math.ceil(second/fps))
            if (key == 27):
                break
            elif (key == 32):
                key = 0
                while key != 32:
                    key = waitkey()
            
    cv2.destroyWindow(window_name)


In [None]:
#from skimage import io, transform
import csv
import glob
import torch
import cv2
from PIL import Image
from torchvision.transforms import *
from collections import namedtuple

import pandas as pd
import numpy
    
def videoPrediction(folder_id):
    transform = Compose([
            CenterCrop(84),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406],
                      std=[0.229, 0.224, 0.225])
            ])

    video_folder = './20bn-jester-v1'
    
    frames_names = train_data.get_frame_names(os.path.join(video_folder, gesture_id))

    imgs = []
    for index, path in enumerate(frames_names):
        img = Image.open(path)
        img = transform(img)
        imgs.append(torch.unsqueeze(img, 0))
        if index > 16:
            break

    data = torch.cat(imgs)
    data = data.permute(1, 0, 2, 3)
    data = torch.unsqueeze(data, 0)

    input= data.to(device)

    # compute output and loss
    output = model(input)

    _, predicted = torch.max(output.data, 1)
    predicted = predicted.detach().cpu().numpy()

    #print(window_name)
    window_name = class_to_idx[int(predicted)]

    print(predicted)
    print(window_name)
    cv2.namedWindow(window_name)
    
    for path in frames_names:          
        frame = cv2.imread(str(path))
        frame = cv2.resize(frame, (400, 400))
        cv2.imshow(window_name, frame)
        key = cv2.waitKey(math.ceil(second/fps))
        if (key == 27):
            break
        elif (key == 32):
            key = 0
            while key != 32:
                key = waitkey()
            
    cv2.destroyWindow(window_name)
    
    
    
    
videoPrediction(5)

In [None]:
import csv
import glob
import torch
import cv2
from PIL import Image
from torchvision.transforms import *
from collections import namedtuple

transform = Compose([
        CenterCrop(84),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406],
                  std=[0.229, 0.224, 0.225])
        ])


def play_video(video, fps = 5):
    imgs = []
    counter = 0
    vid = cv2.VideoCapture(0)
    
    if not vid.isOpened():
        print("Cannot open camera")
        exit()
    
    cv2.namedWindow('Video')
    ret_val = True
    print('Singal: Do An Action')
    while ret_val:
        ret_val, frame = vid.read()
        if not ret_val:
            print('frame broke')
            continue
        frame = cv2.resize(frame, (400, 400))
        cv2.imshow('Video', frame)
        if cv2.waitKey(int(1000/fps)) == 27:
            break
        
        imgFrame = Image.fromarray(frame)
        imgFrame = transform(imgFrame)
        imgs.append(torch.unsqueeze(imgFrame, 0))
        counter += 1
        if counter > 17:
            data = torch.cat(imgs)
            data = data.permute(1, 0, 2, 3)
            data = torch.unsqueeze(data, 0)
            input= data.to(device)

            # compute output and loss
            output = model(input)

            _, predicted = torch.max(output.data, 1)
            predicted = predicted.detach().cpu().numpy()

            #print(window_name)
            print(class_to_idx[int(predicted)])
            imgs = []
            counter = 0
            print('Singal: Do An Action')
            
    cv2.destroyWindow('Video')
    vid.release()

    
play_video(0)
            