# Finetune the officially trained model

First download the pretrained model from [here](https://github.com/Cyanogenoid/pytorch-vqa/releases/tag/v1.0).

Then move it to `logs/`.

In [1]:
import sys
import os.path
import math
import json

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from tqdm import tqdm

import config
import data
import model
import utils


def update_learning_rate(optimizer, iteration):
    lr = config.initial_lr * 0.5**(float(iteration) / config.lr_halflife)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


total_iterations = 0


def run(net, loader, optimizer, tracker, train=False, prefix='', epoch=0):
    """ Run an epoch over the given loader """
    if train:
        net.train()
        tracker_class, tracker_params = tracker.MovingMeanMonitor, {'momentum': 0.99}
    else:
        net.eval()
        tracker_class, tracker_params = tracker.MeanMonitor, {}
        answ = []
        idxs = []
        accs = []

    tq = tqdm(loader, desc='{} E{:03d}'.format(prefix, epoch), ncols=0)
    loss_tracker = tracker.track('{}_loss'.format(prefix), tracker_class(**tracker_params))
    acc_tracker = tracker.track('{}_acc'.format(prefix), tracker_class(**tracker_params))

    log_softmax = nn.LogSoftmax(dim=0).cuda()
    for v, q, a, idx, q_len in tq:
        var_params = {
            # 'volatile': not train,
            'requires_grad': False,
        }
        v = Variable(v.cuda(non_blocking=True), **var_params)
        q = Variable(q.cuda(non_blocking=True), **var_params)
        a = Variable(a.cuda(non_blocking=True), **var_params)
        q_len = Variable(q_len.cuda(non_blocking=True), **var_params)

        out = net(v, q, q_len)
        nll = -log_softmax(out)
        loss = (nll * a / 10).sum(dim=1).mean()
        acc = utils.batch_accuracy(out.data, a.data).cpu()

        if train:
            global total_iterations
            update_learning_rate(optimizer, total_iterations)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_iterations += 1
        else:
            # store information about evaluation of this minibatch
            _, answer = out.data.cpu().max(dim=1)
            answ.append(answer.view(-1))
            accs.append(acc.view(-1))
            idxs.append(idx.view(-1).clone())
        
        loss_tracker.append(loss.item())
        # acc_tracker.append(acc.mean())
        for a in acc:
            acc_tracker.append(a.item())
        fmt = '{:.4f}'.format
        tq.set_postfix(loss=fmt(loss_tracker.mean.value), acc=fmt(acc_tracker.mean.value))

    if not train:
        answ = list(torch.cat(answ, dim=0))
        accs = list(torch.cat(accs, dim=0))
        idxs = list(torch.cat(idxs, dim=0))
        return answ, accs, idxs

In [10]:
from datetime import datetime
name = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
target_name = os.path.join('logs', '{}.pth'.format(name))
print('will save to {}'.format(target_name))

cudnn.benchmark = True

train_loader = data.get_loader(train=True)
val_loader = data.get_loader(val=True)

# net = nn.DataParallel(model.Net(train_loader.dataset.num_tokens)).cuda()
# inner_net = net.module

# Load the official pretrained model
log = torch.load('logs/2017-08-04_00.55.19.pth')
tokens = len(log['vocab']['question']) + 1
net = nn.DataParallel(model.Net(tokens)).cuda()
inner_net = net.module
inner_net.classifier = model.Classifier(
    in_features=2 * config.output_features + 1024,
    mid_features=1024,
    out_features=3000, # replaced for loading
    drop=0.5,
)

net.load_state_dict(log['weights']) # load!

# No need autograd
for param in inner_net.parameters():
    inner_net.requires_grad = False
# Replace the textprocessor layer
inner_net.text = model.TextProcessor(
    embedding_tokens=train_loader.dataset.num_tokens,
    embedding_features=300,
    lstm_features=1024,
    drop=0.5,
)
inner_net.text.cuda()
# Replace the classifier layer
inner_net.classifier = model.Classifier(
    in_features=2 * config.output_features + 1024,
    mid_features=1024,
    out_features=config.max_answers,
    drop=0.5,
)
inner_net.classifier.cuda()

optimizer = optim.Adam([p for p in net.parameters() if p.requires_grad])

tracker = utils.Tracker()
config_as_dict = {k: v for k, v in vars(config).items() if not k.startswith('__')}

will save to logs/2021-07-10_23:20:00.pth


In [11]:
for i in range(config.epochs):
    _ = run(net, train_loader, optimizer, tracker, train=True, prefix='train', epoch=i)
    r = run(net, val_loader, optimizer, tracker, train=False, prefix='val', epoch=i)

    results = {
        'name': name,
        'tracker': tracker.to_dict(),
        'config': config_as_dict,
        'weights': net.state_dict(),
        'eval': {
            'answers': r[0],
            'accuracies': r[1],
            'idx': r[2],
        },
        'vocab': train_loader.dataset.vocab,
    }
    torch.save(results, target_name)

train E000: 100% 164/164 [00:27<00:00,  5.95it/s, acc=0.2698, loss=3.4048]
val E000: 100% 168/168 [00:20<00:00,  8.11it/s, acc=0.2690, loss=2.6996]
train E001: 100% 164/164 [00:22<00:00,  7.38it/s, acc=0.2072, loss=2.9109]
val E001: 100% 168/168 [00:21<00:00,  7.89it/s, acc=0.1124, loss=2.5225]
train E002: 100% 164/164 [00:22<00:00,  7.25it/s, acc=0.3612, loss=2.7491]
val E002: 100% 168/168 [00:19<00:00,  8.43it/s, acc=0.3152, loss=2.4384]
train E003: 100% 164/164 [00:20<00:00,  8.01it/s, acc=0.4153, loss=2.6307]
val E003: 100% 168/168 [00:21<00:00,  7.83it/s, acc=0.3268, loss=2.3994]
train E004: 100% 164/164 [00:21<00:00,  7.70it/s, acc=0.4409, loss=2.6010]
val E004: 100% 168/168 [00:19<00:00,  8.51it/s, acc=0.3278, loss=2.3864]
train E005: 100% 164/164 [00:22<00:00,  7.43it/s, acc=0.4285, loss=2.5653]
val E005: 100% 168/168 [00:21<00:00,  7.84it/s, acc=0.3299, loss=2.3693]
train E006: 100% 164/164 [00:22<00:00,  7.26it/s, acc=0.4922, loss=2.5296]
val E006: 100% 168/168 [00:21<00:00, 