# Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Test set features


In [2]:
!tar xzf /content/drive/MyDrive/stat453/test2015.tar.gz

In [4]:
!ls test2015/ | wc -l

81434


In [5]:
!git clone https://github.com/zhaoyi3264/openvqa.git

Cloning into 'openvqa'...
remote: Enumerating objects: 1566, done.[K
remote: Counting objects: 100% (1566/1566), done.[K
remote: Compressing objects: 100% (612/612), done.[K
remote: Total 1566 (delta 933), reused 1560 (delta 928), pack-reused 0[K
Receiving objects: 100% (1566/1566), 1.70 MiB | 7.74 MiB/s, done.
Resolving deltas: 100% (933/933), done.


In [6]:
!mv test2015/ openvqa/data/vqa/feats/
!mkdir openvqa/data/vqa/feats/train2014
!mkdir openvqa/data/vqa/feats/val2014

## Raw data

In [10]:
!cp /content/drive/MyDrive/stat453/raw/*.json openvqa/data/vqa/raw/

## Pre-trained model (BUTD)



In [62]:
!mkdir -p openvqa/ckpts/ckpt_butd/
!cp /content/drive/MyDrive/stat453/ckpts/ckpt_butd/epoch13.pkl openvqa/ckpts/ckpt_butd/

## Spacy model

In [14]:
# !wget https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.1.0/en_vectors_web_lg-2.1.0.tar.gz -O en_vectors_web_lg-2.1.0.tar.gz
!pip install -q /content/drive/MyDrive/stat453/en_vectors_web_lg-2.1.0.tar.gz

  Building wheel for en-vectors-web-lg (setup.py) ... [?25l[?25hdone


In [None]:
# %load_ext autoreload
# %autoreload 2

## Source code

https://github.com/MILVLG/openvqa/blob/6b9bfeb2e6462b946d7e7866ffc49dd7a8bcece3/openvqa/core/base_cfgs.py
https://github.com/MILVLG/openvqa/blob/6b9bfeb2e6462b946d7e7866ffc49dd7a8bcece3/utils/exec.py#L36
https://github.com/MILVLG/openvqa/blob/6b9bfeb2e6462b946d7e7866ffc49dd7a8bcece3/utils/test_engine.py#L49
https://github.com/MILVLG/openvqa/blob/6b9bfeb2e6462b946d7e7866ffc49dd7a8bcece3/openvqa/datasets/vqa/vqa_loader.py#L68

# Load dataset

In [64]:
%cd /content/openvqa/

/content/openvqa


In [65]:
import gc
import json

import numpy as np
import torch
import torch.utils.data as Data
import yaml
# from yaml import CLoader

from openvqa.datasets.dataset_loader import DatasetLoader, EvalLoader
from openvqa.datasets.vqa.eval.vqa import VQA
from openvqa.datasets.vqa.eval.vqaEval import VQAEval
from openvqa.models.model_loader import CfgLoader, ModelLoader
from run import create_parser
from utils.test_engine import test_engine

parser = create_parser()
arg = '--RUN test --MODEL butd --DATASET vqa --NW 2 --CKPT_V butd --CKPT_E 13'
args = parser.parse_args(arg.split(' '))

cfg_file = "configs/{}/{}.yml".format(args.DATASET, args.MODEL)
with open(cfg_file, 'r') as f:
    # yaml_dict = yaml.load(f, Loader=CLoader)
    yaml_dict = yaml.load(f)

__C = CfgLoader(yaml_dict['MODEL_USE']).load()
args = __C.str_to_bool(args)
args_dict = __C.parse_to_dict(args)

args_dict = {**yaml_dict, **args_dict}
__C.add_args(args_dict)
__C.proc()

print('Hyper Parameters:')
print(__C)

Checking dataset ........
Finished!

Hyper Parameters:
{ BATCH_SIZE        }->512
{ BBOX_NORMALIZE    }->False
{ CACHE_PATH        }->./results/cache
{ CKPTS_PATH        }->./ckpts
{ CKPT_EPOCH        }->13
{ CKPT_PATH         }->None
{ CKPT_VERSION      }->butd
{ CLASSIFER_DROPOUT_R }->0.5
{ DATASET           }->vqa
{ DATA_PATH         }->{'vqa': './data/vqa', 'gqa': './data/gqa', 'clevr': './data/clevr'}
{ DATA_ROOT         }->./data
{ DEVICES           }->[0]
{ DROPOUT_R         }->0.2
{ EVAL_BATCH_SIZE   }->256
{ EVAL_EVERY_EPOCH  }->False
{ FEATS_PATH        }->{'vqa': {'train': './data/vqa/feats/train2014', 'val': './data/vqa/feats/val2014', 'test': './data/vqa/feats/test2015'}, 'gqa': {'default-frcn': './data/gqa/feats/gqa-frcn', 'default-grid': './data/gqa/feats/gqa-grid'}, 'clevr': {'train': './data/clevr/feats/train', 'val': './data/clevr/feats/val', 'test': './data/clevr/feats/test'}}
{ FEAT_SIZE         }->{'vqa': {'FRCN_FEAT_SIZE': (100, 2048), 'BBOX_FEAT_SIZE': (100, 5)},

In [3]:
dataset = DatasetLoader(__C).DataSet()

data_size = dataset.data_size
token_size = dataset.token_size
ans_size = dataset.ans_size
pretrained_emb = dataset.pretrained_emb

Finished!



In [66]:
net = ModelLoader(__C).Net(
    __C,
    pretrained_emb,
    token_size,
    ans_size
)

net.cuda()
net.eval()

with open('ckpts/ckpt_butd/epoch13.pkl', 'rb') as f:
    state = torch.load(f)
net.load_state_dict(state['state_dict'])

<All keys matched successfully>

In [88]:
dataloader = Data.DataLoader(
    dataset,
    batch_size=__C.EVAL_BATCH_SIZE,
    shuffle=False,
    num_workers=__C.NUM_WORKERS,
    pin_memory=__C.PIN_MEM
)

# Prediction (on test set)

In [97]:
%%time
atts = []
ans_ix_list = []
pred_list = []

for step, (frcn_feat, grid_feat, bbox_feat, ques_ix, ans) in enumerate(dataloader):
    print("\rEvaluation: [step %4d/%4d]" % (
        step,
        int(data_size / __C.EVAL_BATCH_SIZE),
    ), end='          ')

    frcn_feat = frcn_feat.cuda()
    grid_feat = grid_feat.cuda()
    bbox_feat = bbox_feat.cuda()
    ques_ix = ques_ix.cuda()

    lang_feat = net.embedding(ques_ix)
    lang_feat, _ = net.rnn(lang_feat)

    img_feat, _ = net.adapter(frcn_feat, grid_feat, bbox_feat)

    q, v = lang_feat[:, -1], img_feat

    att = net.backbone.v_att(q, v)
    atts.append(att.squeeze(-1).cpu().detach().numpy())
    atted_v = (att * v).sum(1)
    q_repr = net.backbone.q_net(q)
    v_repr = net.backbone.v_net(atted_v)
    joint_repr = q_repr * v_repr
    pred = net.classifer(joint_repr)

    pred_np = pred.cpu().data.numpy()
    pred_argmax = np.argmax(pred_np, axis=1)
    
    if pred_argmax.shape[0] != __C.EVAL_BATCH_SIZE:
        pred_argmax = np.pad(
            pred_argmax,
            (0, __C.EVAL_BATCH_SIZE - pred_argmax.shape[0]),
            mode='constant',
            constant_values=-1
        )

    ans_ix_list.append(pred_argmax)
    
    if __C.TEST_SAVE_PRED:
        if pred_np.shape[0] != __C.EVAL_BATCH_SIZE:
            pred_np = np.pad(
                pred_np,
                ((0, __C.EVAL_BATCH_SIZE - pred_np.shape[0]), (0, 0)),
                mode='constant',
                constant_values=-1
            )

        pred_list.append(pred_np)

# Evaluation: [step 1749/1749]
# CPU times: user 4min 39s, sys: 3min 56s, total: 8min 36s
# Wall time: 34min 6s

Evaluation: [step 1749/1749]          CPU times: user 4min 39s, sys: 3min 56s, total: 8min 36s
Wall time: 34min 6s


In [98]:
ans_ix_list = np.array(ans_ix_list).reshape(-1)

qid_list = [ques['question_id'] for ques in dataset.ques_list]
ans_size = dataset.ans_size

result = [{
        'answer': dataset.ix_to_ans[str(ans_ix_list[qix])],
        'question_id': int(qid_list[qix])
    } for qix in range(qid_list.__len__())]

result_eval_file = __C.CACHE_PATH + '/result_run_' + __C.VERSION
log_file = __C.LOG_PATH + '/log_run_' + __C.VERSION + '.txt'
result_eval_file += '.json'
print('Save the result to file: {}'.format(result_eval_file))
with open(result_eval_file, 'w') as f:
    json.dump(result, f)

Save the result to file: ./results/cache/result_run_2205634.json


In [99]:
res = np.concatenate(atts)
res.shape

(447793, 100)

In [100]:
res.nbytes / 2**20

170.81947326660156

In [101]:
np.savez_compressed('att_weight_butd', att=res)

In [102]:
temp = np.load('att_weight_butd.npz')
temp['att'].shape

(447793, 100)