resnet 3D CNN + kinetics-400 pretrained + hmdb51 training  
Reference paper: Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs and ImageNet?

In [3]:
import torch
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torchsummaryX import summary
import torch.optim as optim
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

import pretrained_model as model
import os
import time
import copy
import warnings
warnings.filterwarnings("ignore")
import utils

## Loading data

In [4]:
BSZ = 16

transform = transforms.Compose([utils.RandomResizedCropVideo(112), utils.ToTensorVideo()])

data_root = '/mnt/data/public/datasets/HMDB'

train_loader = DataLoader(datasets.HMDB51(root = os.path.join(data_root, 'hmdb51'), 
                                          annotation_path = os.path.join(data_root,'splits'),
                                          frames_per_clip = 8, fold = 1, train = True, 
                                          transform = transform), 
                          batch_size = BSZ, shuffle = True, num_workers = 4)

val_loader = DataLoader(datasets.HMDB51(root = os.path.join(data_root, 'hmdb51'), 
                                        annotation_path = os.path.join(data_root, 'splits'),
                                        frames_per_clip = 8, fold = 2, train = False, 
                                        transform = transform), 
                        batch_size = BSZ, shuffle = True, num_workers = 4)

test_loader = DataLoader(datasets.HMDB51(root = os.path.join(data_root, 'hmdb51'), 
                                         annotation_path = os.path.join(data_root, 'splits'),
                                         frames_per_clip = 8, fold = 3, train = False, 
                                         transform = transform), 
                         batch_size = BSZ, shuffle = True, num_workers = 4)



input_size = next(iter(train_loader))[0].size()
assert input_size == torch.Size([BSZ, 3, 8, 112, 112]), 'input_size is {}'.format(input_size)

HBox(children=(FloatProgress(value=0.0, max=423.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=423.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=423.0), HTML(value='')))




In [10]:
print('train loader', len(train_loader))
print('val loader', len(val_loader))
print('test loader', len(test_loader))

train loader 19998
val loader 8655
test loader 8574


## Define model 

In [5]:
LR = 1e-4
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
model_ft = model.get_model().to(device)
criterion =  nn.CrossEntropyLoss()
optimizer = optim.Adam(model_ft.parameters(), lr = LR)
path = path = f'bsz:{BSZ}-lr:{LR}-nepoch:{NEPOCH}'
writer = SummaryWriter(os.path.join('./log', path))

In [6]:
# print(model_ft)
# summary(model_ft, torch.zeros(1, 3, 8, 112, 112).to(device))

# classes = ['brush_hair', 'eat', 'kiss', 'shake_hands', 'sword',
#            'cartwheel', 'fall_floor', 'laugh', 'shoot_ball', 'sword_exercise',
#            'catch', 'fencing', 'pick', 'shoot_bow', 'talk',
#            'chew', 'flic_flac', 'pour', 'shoot_gun', 'throw',
#            'clap', 'golf', 'pullup', 'sit', 'turn',
#            'climb', 'handstand', 'punch', 'situp', 'walk',
#            'climb_stairs', 'hit', 'push', 'smile', 'wave',
#            'dive', 'hug', 'pushup', 'smoke', 'draw_sword',
#            'jump', 'ride_bike', 'somersault', 'dribble', 'kick',
#            'ride_horse', 'stand', 'drink', 'kick_ball', 'run', 'swing_baseball']

## Train and test

In [9]:
NEPOCH = 10
best_val_acc = 0.0
best_val_model = copy.deepcopy(model_ft.state_dict())
for epoch in range(NEPOCH):

    model.train(model_ft, train_loader, criterion, optimizer, epoch, writer, device)
    val_loss, val_acc = model.evaluate(model_ft, val_loader, criterion, device)
    print(f'epoch {epoch} | val loss {val_loss} | val acc {val_acc}')

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_val_model = copy.deepcopy(model_ft.state_dict())
        
model_ft.load_state_dict(best_val_model)
test_loss, test_acc = model.evaluate(model_ft, test_loader, criterion, device)
print(f'test loss {test_loss} | test acc {test_acc}')

KeyboardInterrupt: 

## Save best model

## Add hyper parameters and final results to tensorboard

In [57]:
hparam_dict = {'train size': len(train_loader.dataset),
               'validation size': len(val_loader.dataset),
               'test size': len(test_loader.dataset),
               'batch number per epoch': len(train_loader),
               'batch size': train_loader.batch_size,
               'epoch number': EPOCH,
               'input shape': str(input_size)}
metric_dict = {'hparam/best validation accuracy': best_val_acc,
               'hparam/test accuracy': test_acc}
writer.add_hparams(hparam_dict, metric_dict)
writer.flush()