In [1]:
%pylab
%reload_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import easydict as ED
import os
import time

import torch 
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms

import models
# from main import compute_features
from util import AverageMeter

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [2]:
args = ED.EasyDict()
args.arch = 'vgg16'
args.sobel = True
args.data = '/run/user/1001/gvfs/smb-share:server=samba.dragon.kaust.edu.sa,share=' \
            'scratch_dragon_intel/parawr/fac100k'

args.checkpoint = '/run/user/1001/gvfs/smb-share:server=samba.dragon.kaust.edu.sa,share=' \
                  'parawr/Projects/deepCluster/exp/vgg16/LR_0.01_WD_m5_BS_32_K_10000/' \
                  'checkpoint.pth.tar'

args.save_dir = './features'

args.batch = 32
args.workers = 12
args.verbose = True

In [3]:
def get_exp_name(ckpt_name):
    names = ckpt_name.split('/')
    return names[-2]

def load_weights(net, weights):
    if os.path.isfile(weights):
            print("=> loading checkpoint '{}'".format(weights))
            checkpoint = torch.load(weights)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint (epoch {})"
                  .format(checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(weights))
        
def compute_features(dataloader, model, N):
    batch_time = AverageMeter()
    end = time.time()
    model.eval()
    # discard the label information in the dataloader
    for i, (input_tensor, _) in enumerate(dataloader):
        print('\rBatch Number {}/{}'.format(i, N), end='')
        with torch.no_grad():
            input_var = input_tensor.cuda()
            aux = model(input_var).data.cpu().numpy()

        if i == 0:
            features = np.zeros((N, aux.shape[1])).astype('float32')

        if i < len(dataloader) - 1:
            features[i * args.batch: (i + 1) * args.batch] = aux.astype('float32')
        else:
            # special treatment for final batch
            features[i * args.batch:] = aux.astype('float32')

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if args.verbose and (i % 200) == 0:
            print('{0} / {1}\t'
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})'
                  .format(i, len(dataloader), batch_time=batch_time))
    return features

In [5]:
%pwd

'/home/parawr/Projects/deepCluster'

In [4]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
tra = [transforms.Resize(256),
   transforms.CenterCrop(224),
   transforms.ToTensor(),
   normalize]

# load the data
end = time.time()
dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra))
print('Load dataset: {0:.2f} s'.format(time.time() - end))
dataloader = torch.utils.data.DataLoader(dataset,
                                         batch_size=args.batch,
                                         num_workers=args.workers,
                                         pin_memory=True)

FileNotFoundError: [Errno 2] No such file or directory: '/run/user/1001/gvfs/smb-share:server=samba.dragon.kaust.edu.sa,share=scratch_dragon_intel/parawr/fac100k'

In [None]:
model = models.__dict__[args.arch](sobel=args.sobel)
fd = int(model.top_layer.weight.size()[1])
model.top_layer = None
model.features = torch.nn.DataParallel(model.features)
model = model.cuda()
cudnn.benchmark = True

load_weights(model, args.checkpoint)

In [None]:
model.top_layer = None
model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])

# get the features for the whole dataset
features = compute_features(dataloader, model, len(dataset))

In [None]:
len(dataset)

In [None]:
df = pd.DataFrame(features)
df.to_csv(args.save_dir+'/feat100k.csv')

In [None]:
df.head