In [1]:
from torchvision import models
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from random import shuffle
import numpy as np
import os
from PIL import Image
from torchvision import transforms
import random
import torch
from torch.autograd import Variable

In [2]:
test_transform = transforms.Compose(
    [transforms.Resize(256),  # 1. Resize smallest side to 256.
     transforms.CenterCrop(224), # 2. Crop center square of 224x224 pixels.
     transforms.ToTensor(), # 3. Convert to pytorch tensor.
     transforms.Normalize(mean = [0.485, 0.456, 0.406],  # normalize.
                          std = [0.229, 0.224, 0.225])
    ])

In [3]:
def get_img_list(cat,d1):
    ret = list()
    for frame in os.listdir('data/data_first_25/{}/{}'.format(cat,d1)):
        img_pil = Image.open('data/data_first_25/{}/{}/{}'.format(cat,d1,frame))
        input_img = test_transform(img_pil).unsqueeze(0)
        ret.append(input_img)
    return ret

def createTrainAndValSet(categories,trainPercentage):
    category_options = sorted(os.listdir('data/data_first_25'))
    category_names = category_options[:categories]
    train_set = []
    val_set = []
    i=0
    for cat in category_names:
        print(cat)
        for d1 in os.listdir('data/data_first_25/{}'.format(cat)):
            img_list = get_img_list(cat,d1)
            if int(d1[1:3]) <= trainPercentage * 25:
                train_set.append((img_list,i))
            else:
                val_set.append((img_list,i))
        i+=1
    return train_set,val_set

In [4]:
class AverageModel(nn.Module):
    def __init__(self, output_size=5):
        super(AverageModel, self).__init__()

        resnet = models.resnet18(pretrained=True)
        modules = list(resnet.children())[:-1]      # delete the last fc layer.
        self.output_size = output_size
        self.resnet = nn.Sequential(*modules)
        self.lstm = nn.LSTM(resnet.fc.in_features, resnet.fc.in_features)
        self.hidden = (Variable(torch.randn(1, 1, resnet.fc.in_features).cuda()), Variable(
torch.randn((1, 1, resnet.fc.in_features)).cuda())) 
        self.fc1 = nn.Linear(resnet.fc.in_features, output_size)
        self.dropout = nn.Dropout(p=0.25)
        
    def forward(self, x_3d):        
        x = self.resnet(x_3d.squeeze())  # ResNet
        
#       Rearranging layers for LSTM so it is in the format: [sequence length, batch size, input size]
        x = x.transpose(1, 2)[:, :, :, 0]
        x, self.hidden = self.lstm(x, self.hidden)
        
#       Detach hidden layers so we don't run out of memory
        self.hidden[0].detach_()
        self.hidden[1].detach_()

        # FC layers
#       Using only the last output layer, because this is an LSTM
        x = self.fc1(x[-1, :, :])
        x = self.dropout(x)
        return x


In [5]:
from random import shuffle

train_accuracies = []; train_losses = [];
val_accuracies = []; val_losses = [];

def train_model(model, loss_fn, optimizer, epochs):
    model = model.cuda()
    loss_fn = loss_fn.cuda()
    batchSize = 1

    for epoch in range(epochs):
        correct = 0
        cum_loss = 0

        i = 0
        model.train()
        shuffle(train_set)
        for video in train_set:
            frame_list, target_cat = video
            frame_list = torch.stack(frame_list, dim=0).transpose(0, 1).cuda()
            scores = model(frame_list)
            
            
            loss = loss_fn(scores, torch.tensor(np.array([target_cat]),dtype=torch.long).cuda())
            max_score, max_label = scores.max(1)
            if max_label == target_cat:
                correct+=1
            cum_loss += loss.item()
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if (i + 1) % 100 == 0:
                print('Train-epoch %d. Iteration %05d, Avg-Loss: %.4f, Accuracy: %.4f' % 
                    (epoch, i + 1, cum_loss / (i + 1), correct / ((i + 1) * batchSize)))
            i += 1
            
        train_accuracies.append(correct / len(train_set))
        train_losses.append(cum_loss / (i + 1))   
        
        i = 0
        correct = 0
        cum_loss = 0
        model.eval()
        shuffle(val_set)
        for video in val_set:
            frame_list, target_cat = video
            frame_list = torch.stack(frame_list, dim=0).transpose(0, 1).cuda()
            scores = model(frame_list)
            
            loss = loss_fn(scores, torch.tensor(np.array([target_cat]),dtype=torch.long).cuda())
            max_score, max_label = scores.max(1)
            if max_label == target_cat:
                correct+=1
            cum_loss += loss.item()
            
            i += 1
        print('Validation-epoch %d. Iteration %05d, Avg-Loss: %.4f, Accuracy: %.4f' % 
               (epoch, i + 1, cum_loss / (i + 1), correct / len(val_set)))
        
        val_accuracies.append(correct / len(val_set))
        val_losses.append(cum_loss / (i + 1))

In [6]:
categories = 10

train_set, val_set = createTrainAndValSet(categories, 0.75)

ApplyEyeMakeup
ApplyLipstick
Archery
BabyCrawling
BalanceBeam
BandMarching
BaseballPitch
Basketball
BasketballDunk
BenchPress


In [None]:
learning_rate = 3e-3

my_model = AverageModel(output_size=categories)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(my_model.parameters(), lr=learning_rate)
epochs = 20

train_model(my_model, loss_fn, optimizer, epochs)

Train-epoch 0. Iteration 00100, Avg-Loss: 2.3110, Accuracy: 0.1400
Train-epoch 0. Iteration 00200, Avg-Loss: 2.3181, Accuracy: 0.1400
Train-epoch 0. Iteration 00300, Avg-Loss: 2.3271, Accuracy: 0.1233
Train-epoch 0. Iteration 00400, Avg-Loss: 2.3320, Accuracy: 0.1125
Train-epoch 0. Iteration 00500, Avg-Loss: 2.3386, Accuracy: 0.1000
Train-epoch 0. Iteration 00600, Avg-Loss: 2.3407, Accuracy: 0.0967
Train-epoch 0. Iteration 00700, Avg-Loss: 2.3409, Accuracy: 0.0971
Train-epoch 0. Iteration 00800, Avg-Loss: 2.3402, Accuracy: 0.0963
Train-epoch 0. Iteration 00900, Avg-Loss: 2.3382, Accuracy: 0.0911
Validation-epoch 0. Iteration 00387, Avg-Loss: 2.3046, Accuracy: 0.1218
Train-epoch 1. Iteration 00100, Avg-Loss: 2.2766, Accuracy: 0.1100
Train-epoch 1. Iteration 00200, Avg-Loss: 2.2713, Accuracy: 0.1200
Train-epoch 1. Iteration 00300, Avg-Loss: 2.2827, Accuracy: 0.1133
Train-epoch 1. Iteration 00400, Avg-Loss: 2.2856, Accuracy: 0.1300
Train-epoch 1. Iteration 00500, Avg-Loss: 2.2809, Accurac

In [None]:
plt.figure(figsize = (10, 4))
plt.subplot(1, 2, 1)
plt.plot(val_losses, 'bo-', label = 'val-loss')
plt.plot(train_losses, 'ro-', label = 'train-loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['validation', 'training'], loc='upper right')

plt.subplot(1, 2, 2)
plt.plot(val_accuracies, 'bo-', label = 'val-acc')
plt.plot(train_accuracies, 'ro-', label = 'train-acc')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['validation', 'training'], loc='lower right')
plt.show()