In [1]:
import torch
import torchvision
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchsummaryX import summary

import matplotlib.pyplot as plt

from dataset import PhoenixFrame

## Loading Dataset

In [2]:
BSZ = 100
transform = transforms.Compose([transforms.RandomResizedCrop(224),
                                transforms.ToTensor()])

root = '/mnt/data/public/datasets'
dataset = PhoenixFrame(root, transform)
train_size = int(len(dataset) * 0.8)
dev_size = int(len(dataset) * 0.1)
test_size = len(dataset) - train_size - dev_size
# ? 分了3中性
train_set, dev_set, test_set = torch.utils.data.random_split(
    dataset, (train_size, dev_size, test_size))
train_loader = DataLoader(
    train_set, batch_size=BSZ, shuffle=True,
    num_workers=32, pin_memory=True)
dev_loader = DataLoader(
    dev_set, batch_size=BSZ, shuffle=True,
    num_workers=32, pin_memory=True)
test_loader = DataLoader(
    test_set, batch_size=BSZ,shuffle=True,
    num_workers=32, pin_memory=True)

In [3]:
%%time

print(len(train_loader))
print(len(dev_loader))
print(len(test_loader))

# for batch_idx, batch in enumerate(test_loader):
#     print(batch['frame'].shape)
#     print(batch['target'])
    
#     if batch_idx == 0:
#         break
    

6392
799
799
CPU times: user 389 µs, sys: 186 µs, total: 575 µs
Wall time: 369 µs


## Define Model

In [4]:
LR = 1e-4

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = torchvision.models.resnet18(pretrained=True)

# print(model)
# summary(model, torch.ones(1,3,224,224))

in_features = model.fc.in_features
model.fc=nn.Linear(in_features, 1232)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = LR)
writer = SummaryWriter('./log/test')

## Define Train

In [5]:
def train(model, train_loader, device, criterion, optimizer, epoch, writer):
    model.train()
    running_loss = 0.0
    running_acc = 0.0
    for batch_idx, batch in enumerate(train_loader):
        inputs = batch['frame'].to(device)
        # target is already tensor?
        targets = batch['target'].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs,targets)
        loss.backward()
        optimizer.step()
        
        acc = outputs.max(-1)[1].eq(targets).sum().item()/len(targets)
        running_acc += acc
        running_loss += loss.item()
        
        if batch_idx % 500 == 499:
            writer.add_scalar('train loss',
                              running_loss/500,
                              epoch*len(train_loader)+batch_idx)
            writer.add_scalar('train acc',
                              running_acc/500,
                              epoch*len(train_loader)+batch_idx)
            running_loss = 0.0
            running_acc = 0.0

## Define val

In [None]:
def val(model, dev_loader, device, criterion, epoch, writer):
    model.eval()
    epoch_loss = 0.0
    epoch_acc = 0.0
    with torch.no_grad():
        for batch_idx, batch in enumerate(dev_loader):
            inputs = batch['frame'].to(device)
            # target is already tensor?
            targets = batch['target'].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            acc = outputs.max(-1)[1].eq(targets).sum().item()/len(targets)
            epoch_loss += loss.item()
            epoch_acc +=acc
            
    epoch_loss /= len(dev_loader)
    epoch_acc /= len(dev_loader)
    if writer:
        writer.add_scalar('dev loss', epoch_loss, epoch)
        writer.add_scalar('dev acc', epoch_acc, epoch)
    return epoch_loss, epoch_acc
    

## Train and eval

In [None]:
nEpoch = 50
best_val_acc = 0.0
for epoch in range(nEpoch):
    %time train(model, train_loader, device, criterion, optimizer, epoch, writer)
    val_loss, val_acc = val(model, dev_loader, device, criterion, epoch, writer)
    print(f'epoch:{epoch} | val loss:{val_loss} | val acc:{val_acc}')
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model, './save/test.pth')
        print(f'best model saved with val acc: {val_acc} in epoch {epoch}')
    

## Load and test

In [None]:
# %%time
# device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
# criterion = nn.CrossEntropyLoss()
# model_ft = torch.load('./save/Res18_Pretrained_Phoenix_FrameWise.pth').to(device)
# test_loss, test_acc = val(model_ft, test_loader, device, criterion, epoch=0, writer=None)
# print(f'test loss:{test_loss} | test acc:{test_acc}')

- batch_size: 100, num_worker:32, pin_memory:True -> memory: 3753MB, gpu_utils: 0%,100%, cpu: 45%
- batch_size: 40, num_worker:20, pin_memory:True ->memory: 1941MB, gpu_utils: 0%-100%, cpu: 92%
- batch_size: 20, num_worker:20, pin_memory:True -> memory: 1359MB, gpu_utils: 80%-100%, cpu: 101%
- batch_size: 10, num_worker:20, pin_memory:True -> memory: 1055MB, gpu_utils: 60%-80%, cpu: 110%
- batch_size: 10, num_worker:10, pin_memory:True -> memory: 1055MB, gpu utils: 0%-90%, cpu: 94%
- batch_size: 10, num_worker:5, pin_memory:True -> memory:1055MB, gpu utils: 0%-70%, cpu: 74.7%
- batch_size: 5, num_worker:10, pin_memory:True -> memory:945MB, gpu utils: 30%-60%
- batch_size: 1, num_worker:10, pin_memory:True -> memory: 831MB, gpu utils: 30%-40%, cpu: 108%
- batch_size: 5, num_worker:5, pin_memory:True -> memory: 945MB, gpu utils: 40%-60%, cpu: 107%
- batch_size: 1, num_worker:5, pin_memory:True -> memory: 831MB, gpu utils: 30%-40%, cpu: 116%
