In [1]:
import torch
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torchsummaryX import summary
import torch.optim as optim
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

import os
import warnings
warnings.filterwarnings("ignore")
from dataset import CSL_Isolated

## Loading data

In [2]:
transform = transforms.Compose([transforms.Resize([128, 128]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.5], std=[0.5])])

BSZ = 16
data_path = "/home/haodong/Data/CSL_Isolated/color_video_125000"
label_path = "/home/haodong/Data/CSL_Isolated/dictionary.txt"
train_set = CSL_Isolated(data_path, label_path, train=True, transform=transform)
test_set = CSL_Isolated(data_path, label_path, train=False, transform=transform)
train_loader = DataLoader(train_set, batch_size=BSZ, shuffle=True, num_workers=10)
test_loader = DataLoader(test_set, batch_size=BSZ, shuffle=True, num_workers=10)

In [3]:
# BSZ = 16

# transform = transforms.Compose([utils.RandomResizedCropVideo(112), utils.ToTensorVideo()])

# data_root = '/mnt/data/public/datasets/HMDB'

# train_loader = DataLoader(datasets.HMDB51(root = os.path.join(data_root, 'hmdb51'), 
#                                           annotation_path = os.path.join(data_root,'splits'),
#                                           frames_per_clip = 8, fold = 1, train = True, 
#                                           transform = transform), 
#                           batch_size = BSZ, shuffle = True, num_workers = 4)

# val_loader = DataLoader(datasets.HMDB51(root = os.path.join(data_root, 'hmdb51'), 
#                                         annotation_path = os.path.join(data_root, 'splits'),
#                                         frames_per_clip = 8, fold = 2, train = False, 
#                                         transform = transform), 
#                         batch_size = BSZ, shuffle = True, num_workers = 4)

# test_loader = DataLoader(datasets.HMDB51(root = os.path.join(data_root, 'hmdb51'), 
#                                          annotation_path = os.path.join(data_root, 'splits'),
#                                          frames_per_clip = 8, fold = 3, train = False, 
#                                          transform = transform), 
#                          batch_size = BSZ, shuffle = True, num_workers = 4)



# input_size = next(iter(train_loader))[0].size()
# assert input_size == torch.Size([BSZ, 3, 8, 112, 112]), 'input_size is {}'.format(input_size)

In [4]:
print('train loader', len(train_loader))
print('test loader', len(test_loader))

train loader 6250
test loader 1563


## Define model 

In [5]:
LR = 1e-4
NEPOCH = 10
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')

model = torchvision.models.video.r3d_18(pretrained=True)
inp_feature = model.fc.in_features
model.fc = nn.Linear(inp_feature, 500)
model = model.to(device)

criterion =  nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = LR)
path = path = f'bsz:{BSZ}-lr:{LR}-nepoch:{NEPOCH}'
writer = SummaryWriter(os.path.join('./log', path))

## Define train

In [6]:
# train acc >1 ?
def train(model, train_loader, criterion, optimizer, epoch, writer, device):
    model.train()
    running_loss = 0.0
    running_accuracy = 0.0
    for batch_idx, batch in enumerate(train_loader):
        inputs = batch['data'].to(device)
        target = batch['label'].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, target.view(-1))
        loss.backward()
        optimizer.step()

        predict = outputs.max(1)[1]
        running_accuracy += predict.eq(target).sum().item() / len(target)
        running_loss += loss.item()

        if batch_idx % 600 == 599:
            writer.add_scalar('train loss',
                              running_loss / 600,
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train accuracy',
                              running_accuracy / 600,
                              epoch * len(train_loader) + batch_idx)

            running_loss = 0.0
            running_accuracy = 0.0

## Define val

In [7]:
def evaluate(model, val_loader, criterion, device):
    model.eval()
    epoch_loss = 0.0
    epoch_accuracy = 0.0
    with torch.no_grad():
        for batch_idx, batch in enumerate(val_loader):
            inputs = batch['data'].to(device)
            target = batch['label'].to(device)
            outputs = model(inputs)

            epoch_loss += criterion(outputs, target.view(-1)).item()
            predict = outputs.max(1)[1]
            epoch_accuracy += predict.eq(target).sum().item() / len(target)
            
    return epoch_loss / len(val_loader), epoch_accuracy / len(val_loader)

## Train and save

In [8]:
best_val_acc = 0.0
for epoch in range(NEPOCH):
    %time train(model, train_loader, criterion, optimizer, epoch, writer, device)
    val_loss, val_acc = evaluate(model, test_loader, criterion, device)
    print(f'epoch {epoch} | val loss {val_loss} | val acc {val_acc}')

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model, './save/res3d18.pth')
        

## Load and test

In [9]:
%%time
model_ft = torch.load('./save/res3d18.pth')
test_loss, test_acc = evaluate(model_ft, test_loader, criterion, device)
print(f'test loss {test_loss} | test acc {test_acc}')

0 0.6589291095733643 0.75
test loss 0.0004215797246150763 | test acc 0.0004798464491362764
CPU times: user 376 ms, sys: 769 ms, total: 1.14 s
Wall time: 13.3 s
