In [2]:
from torchvision import models
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from random import shuffle
import numpy as np
import os
from PIL import Image
from torchvision import transforms
import random
import torch

In [3]:
test_transform = transforms.Compose(
    [transforms.Resize(256),  # 1. Resize smallest side to 256.
     transforms.CenterCrop(224), # 2. Crop center square of 224x224 pixels.
     transforms.ToTensor(), # 3. Convert to pytorch tensor.
     transforms.Normalize(mean = [0.485, 0.456, 0.406],  # normalize.
                          std = [0.229, 0.224, 0.225])
    ])

In [4]:
def get_img_list(cat,d1):
    ret = list()
    for frame in os.listdir('data/data_first_25/{}/{}'.format(cat,d1)):
        img_pil = Image.open('data/data_first_25/{}/{}/{}'.format(cat,d1,frame))
        input_img = test_transform(img_pil).unsqueeze(0)
        ret.append(input_img)
    return ret

def createTrainAndValSet(categories,trainPercentage):
    category_options = sorted(os.listdir('data/data_first_25'))
    category_names = category_options[:categories]
    train_set = []
    val_set = []
    i=0
    for cat in category_names:
        print(cat)
        for d1 in os.listdir('data/data_first_25/{}'.format(cat)):
            r = random.uniform(0,1)
            img_list = get_img_list(cat,d1)
#           Adding just a single frame to each frame_list
            if int(d1[1:3]) <= trainPercentage * 25:
                for img in img_list:
                    train_set.append(([img],i))
            else:
                for img in img_list:
                    val_set.append(([img],i))
        i+=1
    return train_set,val_set

In [9]:
class SingleFrameModel(nn.Module):
    def __init__(self, output_size=5):
        super(AverageModel, self).__init__()

        resnet = models.resnet18(pretrained=True)
        modules = list(resnet.children())[:-1]      # delete the last fc layer.
        self.output_size = output_size
        self.resnet = nn.Sequential(*modules)
        self.fc1 = nn.Linear(resnet.fc.in_features, output_size)
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self, x_3d):
        result = torch.zeros((1,self.output_size)).cuda()
        
        x = self.resnet(x_3d[:, 0, :, :, :])  # ResNet
        x = x.view(x.size(0), -1)             # flatten output of conv

        # FC layers
        x = self.fc1(x)
        x = self.dropout(x)
        
        return x

In [10]:
from random import shuffle

train_accuracies = []; train_losses = [];
val_accuracies = []; val_losses = [];

def train_model(model, loss_fn, optimizer, epochs):
    model = model.cuda()
    loss_fn = loss_fn.cuda()
    batchSize = 1

    for epoch in range(epochs):
        correct = 0
        cum_loss = 0

        i = 0
        model.train()
        shuffle(train_set)
        for video in train_set:
            frame_list, target_cat = video
            frame_list = torch.stack(frame_list, dim=0).transpose(0, 1).cuda()
            #scores = model(frame_list[:,0,:,:,:])
            scores = model(frame_list)
            
            
            loss = loss_fn(scores, torch.tensor(np.array([target_cat]),dtype=torch.long).cuda())
            max_score, max_label = scores.max(1)
            if max_label == target_cat:
                correct+=1
            cum_loss += loss.item()
            
            optimizer.zero_grad()
            loss.backward()
            #print(model.fc1.weight)
            optimizer.step()
            
            if (i + 1) % 100 == 0:
                print('Train-epoch %d. Iteration %05d, Avg-Loss: %.4f, Accuracy: %.4f' % 
                    (epoch, i + 1, cum_loss / (i + 1), correct / ((i + 1) * batchSize)))
            i += 1
            
        train_accuracies.append(correct / len(train_set))
        train_losses.append(cum_loss / (i + 1))   
        
        i = 0
        correct = 0
        cum_loss = 0
        model.eval()
        for video in val_set:
            frame_list, target_cat = video
            frame_list = torch.stack(frame_list, dim=0).transpose(0, 1).cuda()
            #scores = model(frame_list[:,0,:,:,:])
            scores = model(frame_list)
            
            loss = loss_fn(scores, torch.tensor(np.array([target_cat]),dtype=torch.long).cuda())
            max_score, max_label = scores.max(1)
            if max_label == target_cat:
                correct+=1
            cum_loss += loss.item()
            
            i += 1
        print('Validation-epoch %d. Iteration %05d, Avg-Loss: %.4f, Accuracy: %.4f' % 
               (epoch, i + 1, cum_loss / (i + 1), correct / len(val_set)))
        
        val_accuracies.append(correct / len(val_set))
        val_losses.append(cum_loss / (i + 1))

In [7]:
categories = 5

train_set, val_set = createTrainAndValSet(categories, 0.6)

ApplyEyeMakeup
ApplyLipstick
Archery
BabyCrawling
BalanceBeam


In [13]:
learning_rate = 5e-3

#my_model = models.resnet18(pretrained=True)
#my_model.fc = nn.Linear(my_model.fc.in_features,categories)
my_model = AverageModel(output_size=categories)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate, weight_decay=1e-4)
epochs = 100

train_model(my_model, loss_fn, optimizer, epochs)

Train-epoch 0. Iteration 00100, Avg-Loss: 3.2142, Accuracy: 0.2100
Train-epoch 0. Iteration 00200, Avg-Loss: 2.8351, Accuracy: 0.1950
Train-epoch 0. Iteration 00300, Avg-Loss: 2.5801, Accuracy: 0.2100
Train-epoch 0. Iteration 00400, Avg-Loss: 2.4096, Accuracy: 0.2050
Train-epoch 0. Iteration 00500, Avg-Loss: 2.3132, Accuracy: 0.2120
Train-epoch 0. Iteration 00600, Avg-Loss: 2.2427, Accuracy: 0.2133
Train-epoch 0. Iteration 00700, Avg-Loss: 2.1702, Accuracy: 0.2214
Train-epoch 0. Iteration 00800, Avg-Loss: 2.1184, Accuracy: 0.2112
Train-epoch 0. Iteration 00900, Avg-Loss: 2.0772, Accuracy: 0.2078
Train-epoch 0. Iteration 01000, Avg-Loss: 2.0374, Accuracy: 0.2140
Train-epoch 0. Iteration 01100, Avg-Loss: 2.0067, Accuracy: 0.2091
Train-epoch 0. Iteration 01200, Avg-Loss: 1.9783, Accuracy: 0.2075
Train-epoch 0. Iteration 01300, Avg-Loss: 1.9500, Accuracy: 0.2115
Train-epoch 0. Iteration 01400, Avg-Loss: 1.9279, Accuracy: 0.2086
Train-epoch 0. Iteration 01500, Avg-Loss: 1.9063, Accuracy: 0.

KeyboardInterrupt: 