# Task 2: Understand body language by gesture recognition with convolutional neural network

## 1. Do literature search on Convolution Neural Network. Learn how to build a convolutional layer in PyTorch.

## 2. Referring to the guide in Task 1, build your own network for gesture classification using convolutional layers. Please see the references 4 in the manual to learn how to build convolutional layers in PyTorch.

## 3. Analyse and comment on the performance of the model. Make a comparison between the fully connected based and convolutional based models and comment on it.

In [1]:
import cv2
import numpy as np
import os
import itertools
import torch.utils.data as utils_data
import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.nn import Flatten, LogSoftmax, Linear, ReLU, CrossEntropyLoss, Sequential, Conv1d, Conv2d, MaxPool2d, Module, Softmax, BatchNorm1d, BatchNorm2d, Dropout
from torch.optim import Adam, SGD

In [2]:
path = './dataset/dataset/images'
dataset = ImageFolder(path,transform = transforms.Compose([
    transforms.Resize((48,48)),transforms.ToTensor()
]))
print(dataset[0])

(tensor([[[0.8824, 0.8784, 0.8745,  ..., 0.6980, 0.7020, 0.6980],
         [0.8784, 0.8784, 0.8706,  ..., 0.6980, 0.7020, 0.6941],
         [0.8784, 0.8706, 0.8667,  ..., 0.6980, 0.6980, 0.6863],
         ...,
         [0.5686, 0.5765, 0.5804,  ..., 0.1922, 0.1922, 0.1922],
         [0.5725, 0.5765, 0.5804,  ..., 0.1176, 0.1333, 0.1451],
         [0.5765, 0.5765, 0.5765,  ..., 0.0902, 0.1059, 0.1294]],

        [[0.9059, 0.9020, 0.8980,  ..., 0.7216, 0.7216, 0.7255],
         [0.9020, 0.9020, 0.8941,  ..., 0.7176, 0.7176, 0.7176],
         [0.8941, 0.8941, 0.8824,  ..., 0.7176, 0.7176, 0.7137],
         ...,
         [0.5804, 0.5804, 0.5843,  ..., 0.1333, 0.1294, 0.1294],
         [0.5804, 0.5843, 0.5882,  ..., 0.0784, 0.0824, 0.0902],
         [0.5843, 0.5882, 0.5843,  ..., 0.0588, 0.0627, 0.0824]],

        [[0.9059, 0.9020, 0.8980,  ..., 0.7412, 0.7412, 0.7490],
         [0.9020, 0.9020, 0.8941,  ..., 0.7451, 0.7451, 0.7451],
         [0.8980, 0.8941, 0.8863,  ..., 0.7451, 0.7451, 0

In [3]:
class CNNModel(Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        # code by yourself
        # first CONV => RELU => POOL combos
        self.conv1 = Conv2d(in_channels=3, out_channels=6,
        kernel_size=(3,3))
        self.relu1 = ReLU()
        self.maxpool1 = MaxPool2d(kernel_size=(2, 2), stride=2)
        # second CONV + RELU + POOL combos
        self.conv2 = Conv2d(in_channels=6, out_channels=12,
        kernel_size=(2,2))
        self.relu2 = ReLU()
        self.maxpool2 = MaxPool2d(kernel_size=(2, 2), stride=2)
        # initialize first (and only) set of FC => RELU layers
        self.fc1 = Linear(in_features=12*11*11, out_features=256)
        self.relu3 = ReLU()
        self.fc2 = Linear(in_features=256, out_features=num_classes)
        
    def forward(self, x):
        # pass the input through our first set of CONV => RELU =>
        # POOL layers
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        # pass the output from the previous layer through the second
        # set of CONV => RELU => POOL layers
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        # flatten the output from the previous layer and pass it
        # through our only set of FC => RELU layers
        x = x.view(-1,12*11*11)
        x = self.fc1(x)
        x = self.relu3(x)
        # pass the output to our softmax classifier to get our output
        # predictions
        output = self.fc2(x)
        # return the output predictions
        return output
        

In [4]:
model = CNNModel(num_classes=4)
# if torch.cuda.is_available():
#     model = model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_func = CrossEntropyLoss()

In [5]:
#dataset = utils_data.TensorDataset(torch.Tensor(Image), torch.LongTensor(Label))
dataset_size = len(dataset)
split_ratio = 0.8
train_size = int(split_ratio * dataset_size)
test_size = dataset_size - train_size
train_set, test_set = utils_data.random_split(dataset, [train_size, test_size])
train_loader = utils_data.DataLoader(dataset=train_set, batch_size=8, shuffle=True)
test_loader = utils_data.DataLoader(dataset=test_set, batch_size=8, shuffle=True)
print('Data is ready!')

Data is ready!


In [6]:
best_accuracy = 0
for epoch in range(200):
    running_loss = 0.0
    train_acc = 0.0
    for step, (batch_image, batch_label) in enumerate(train_loader):
        model.train()
        # if torch.cuda.is_available():
        #     batch_image, batch_label = batch_image.cuda(), batch_label.cuda()
        batch_output = model(batch_image)
        batch_loss = loss_func(batch_output, batch_label)

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

        running_loss += batch_loss.item()

        # train accuracy
        _, train_predicted = torch.max(batch_output.data, 1)
        train_acc += (train_predicted == batch_label).sum().item()

    train_acc /= train_size
    running_loss /= (step+1)

    # ----------test----------
    model.eval()
    test_acc = 0.0
    for test_image, test_label in test_loader:
        test_output = model(test_image)
        _, predicted = torch.max(test_output.data, 1)
        test_acc += (predicted == test_label).sum().item()
    test_acc /= test_size

    print('epoch={:d}\ttrain loss={:.6f}\ttrain accuracy={:.3f}\ttest accuracy={:.3f}'.format(
        epoch, running_loss, train_acc, test_acc))

    if test_acc >= best_accuracy:
        torch.save(model.state_dict(), './trained_models/CNN_model.pkl')
        best_accuracy = test_acc

epoch=0	train loss=1.412903	train accuracy=0.242	test accuracy=0.438
epoch=1	train loss=1.315163	train accuracy=0.323	test accuracy=0.500
epoch=2	train loss=1.236782	train accuracy=0.581	test accuracy=0.750
epoch=3	train loss=1.097561	train accuracy=0.613	test accuracy=0.875
epoch=4	train loss=0.846880	train accuracy=0.871	test accuracy=1.000
epoch=5	train loss=0.555218	train accuracy=0.935	test accuracy=1.000
epoch=6	train loss=0.253123	train accuracy=1.000	test accuracy=1.000
epoch=7	train loss=0.093640	train accuracy=1.000	test accuracy=1.000
epoch=8	train loss=0.069171	train accuracy=0.984	test accuracy=1.000
epoch=9	train loss=0.047306	train accuracy=0.984	test accuracy=1.000
epoch=10	train loss=0.042384	train accuracy=0.984	test accuracy=1.000
epoch=11	train loss=0.066286	train accuracy=0.984	test accuracy=1.000
epoch=12	train loss=0.095662	train accuracy=0.968	test accuracy=1.000
epoch=13	train loss=0.038272	train accuracy=0.984	test accuracy=1.000
epoch=14	train loss=0.006533	t