# ResNet for captioner

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os, sys
import json
import argparse
from random import shuffle, seed
import string
# non-standard dependencies:
import h5py
from six.moves import cPickle
import numpy as np
import torch
import torchvision.models as models
from torch.autograd import Variable
import skimage.io

from torchvision import transforms as trn

from sklearn.decomposition import PCA

preprocess = trn.Compose([
    # trn.ToTensor(),
    trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


from sys import path
sys.path.insert(0, os.getcwd())

from captioning.utils.resnet_utils import myResnet
import captioning.utils.resnet as resnet

In [2]:
def compute_img_feat(img_name, im_path, my_resnet,att_size):
    # load the image
    I = skimage.io.imread(os.path.join(im_path, img_name))

    if len(I.shape) == 2:
        I = I[:, :, np.newaxis]
        I = np.concatenate((I, I, I), axis=2)

    I = I.astype('float32') / 255.0
    I = torch.from_numpy(I.transpose([2, 0, 1]))
    if torch.cuda.is_available(): I = I.cuda()
    with torch.no_grad():
        I = preprocess(I)
        # I = Variable(preprocess(I), volatile=True)
    fc, att = my_resnet(I, att_size)

    return fc.data.cpu().float().numpy(), att.data.cpu().float().numpy()

def make_dir_if_not_there(d):
    if not os.path.isdir(d): os.mkdir(d)

def main(is_relative = 'True',
    att_size = 7,
    model = 'resnet101',
    model_root = 'imagenet_weights',
    output_dir = 'image_features',
    dataset_root = 'datasets/amazon_dresses/',
    image_dir = 'images_resized',
    split = 'dresses_docnos.json'):

    imageDir = dataset_root + image_dir
    split = dataset_root + split

    net = getattr(resnet, model)()
    net.load_state_dict(torch.load(os.path.join(model_root,model+'.pth')))
    my_resnet = myResnet(net)
    if torch.cuda.is_available():
        print('cuda available, use cuda')
        my_resnet.cuda()
    my_resnet.eval()

    split = json.load(open(split, 'r'))
    N = len(split)

    seed(42) # make reproducible
    
    all_fc = []
    all_att = []
    for i, img_temp in enumerate(split):
        im_id = split[i]
        
        imName = im_id+".jpg"
        tmp_fc, tmp_att = compute_img_feat(imName, imageDir, my_resnet,att_size)

        all_fc.append(tmp_fc)
        all_att.append(tmp_att)

        if i % 2000 == 0:
            print('processing %d/%d (%.2f%% done)' % (i, N, i*100.0/N))
            sys.stdout.flush()
    
    np.savez_compressed(os.path.join(output_dir, 'fc_feature'), feat=all_fc)
    np.savez_compressed(os.path.join(output_dir, 'att_feature'), feat=all_att)

    print('Feature preprocessing done')


In [3]:
main()

cuda available, use cuda
processing 0/18501 (0.00% done)
processing 2000/18501 (10.81% done)
processing 4000/18501 (21.62% done)
processing 6000/18501 (32.43% done)
processing 8000/18501 (43.24% done)
processing 10000/18501 (54.05% done)
processing 12000/18501 (64.86% done)
processing 14000/18501 (75.67% done)
processing 16000/18501 (86.48% done)
processing 18000/18501 (97.29% done)
Feature preprocessing done
