In [13]:
import torchvision 
import numpy as np
import torch
import torch.autograd as autograd
from torch.autograd import Variable, Function
from torch.nn.parameter import Parameter
import torch.nn as nn
import pylab as pl
import time
import math
import pickle
from IPython import display
import torch.nn.functional as F
from copy import deepcopy
import cifarResnet
from torchsummary import summary
from matplotlib import pyplot as plt
from scipy.misc import bytescale

torch.cuda.set_device(3) # Set gpu number here

In [14]:
train_set = torchvision.datasets.CIFAR100(root='/data/xay2/cifar100', train=True, download=True)
test_set = torchvision.datasets.CIFAR100(root='/data/xay2/cifar100', train=False, download=True)
with open('/data/xay2/cifar100/cifar-100-python/train', 'rb') as fo:
    d = pickle.load(fo, encoding='bytes')
    train_data = d[b'data'].reshape(-1,3,32,32)
    train_labels = np.array(d[b'fine_labels'])
with open('/data/xay2/cifar100/cifar-100-python/test', 'rb') as fo:
    d = pickle.load(fo, encoding='bytes')
    testdata = d[b'data'].reshape(-1,3,32,32)
    testlabels = np.array(d[b'fine_labels'])
    
train_size = 40000
val_size = 10000
traindata = train_data[:train_size]
trainlabels = train_labels[:train_size]
valdata = train_data[train_size:train_size+val_size]
vallabels = train_labels[train_size:train_size + val_size]

traindata = torch.Tensor(traindata.astype(np.float64))
trainlabels = torch.Tensor(trainlabels.astype(np.float64)).long()
valdata = torch.Tensor(valdata.astype(np.float64))
vallabels = torch.Tensor(vallabels.astype(np.float64)).long()
testdata = torch.Tensor(testdata.astype(np.float64))
testlabels = torch.Tensor(testlabels.astype(np.float64)).long()


standardize = torchvision.transforms.Normalize(mean=[125.3, 123, 113.9],std=[63, 62.1, 66.7])
traindata = torch.cat([standardize(traindata[i]).view(1,3,32,32) for i in range(len(traindata))],dim=0).float()
valdata = torch.cat([standardize(valdata[i]).view(1,3,32,32) for i in range(len(valdata))],dim=0).float()
testdata = torch.cat([standardize(testdata[i]).view(1,3,32,32) for i in range(len(testdata))],dim=0).float()

Files already downloaded and verified
Files already downloaded and verified


In [15]:
# Define our helper functions

def alter(t):
    # flip and crop
    if np.random.random(1) > .5:
        inv_idx = torch.arange(t.size(2)-1,-1,-1).long()
        result = t[:,:,inv_idx]
    else:
        result = t
    result = F.pad(result.view(1,3,32,32),(4,4,4,4)).data
    x = np.random.randint(0,high=9)
    y = np.random.randint(0,high=9)
    return result[0,:,x:x+32,y:y+32].contiguous()

def batchmaker(theset, thelabels, shot=100, alterful=False):
    batch = np.random.choice(len(theset),shot,replace=False)
    data = (torch.cat([alter(theset[i]).view(1,3,32,32) for i in batch],dim=0).float()
            if alterful else
            torch.cat([theset[i].view(1,3,32,32) for i in batch],dim=0).float())
    labels = torch.LongTensor(np.array([thelabels[i] for i in batch]))
    
    return(Variable(data).cuda(),
           Variable(labels).cuda()
          )

criterion = nn.CrossEntropyLoss().cuda()

def evaluate(model, criterion, testing_data, testing_labels, shot=1000):
    support, targs = batchmaker(testing_data, testing_labels, shot=shot)
    c = 200
    assert shot%c==0
    batches = int(shot//c)
    support = support.view(batches, c, 3, 32, 32)
    targs = targs.view(batches, c)
    acc = 0
    loss = 0
    with torch.no_grad():
        for i in range(batches):
            preds = model(support[i,:,:,:,:])
            loss += criterion(preds, targs[i,:]).data[0]
            _,bins = torch.max(preds, 1)
            acc += torch.sum(torch.eq(bins,targs[i,:])).data[0]
    acc = float(acc)/shot
    loss = float(loss)/batches
    return loss, acc

In [16]:
model = cifarResnet.ResNet(9,16).cuda()
i = model.fc.in_features
model.fc = nn.Linear(i,100).cuda()
#model after fine tuning
checkpoint = torch.load('/data/xay2/scripts/pretrained_resnet/fine_tune.pth')
model.load_state_dict(checkpoint)

for param in model.parameters():
    if param.requires_grad == False:
        print(param)


In [19]:
batch_data, batch_targets = batchmaker(traindata, trainlabels, alterful=False, shot=50)
ensemble = list(range(3))

for b in ensemble:
    preds = model(batch_data, True)
    _,bins = torch.max(preds, 1)
    print(bins)
    kl = torch.nn.functional.kl_div(bins,batch_targets,True).cuda()

print(batch_targets)

blocks to drop [4, 5, 1] [3, 2, 5] [2, 7, 1]
skip connections 1
skip connections 4
skip connections 5
skip connections 2
skip connections 3
skip connections 5
skip connections 1
skip connections 2
skip connections 7
tensor([ 88,  59,   0,  93,  73,  68,  49,  50,  50,  71,   1,  71,
         70,  16,  83,  99,  16,  79,  41,  50,  27,  50,  57,  57,
          8,  63,  30,  91,  89,  96,   0,   3,  22,  19,  93,  31,
         46,  55,  91,  29,  97,  28,  33,  69,  14,  39,  45,  32,
         43,  19], device='cuda:3')


RuntimeError: kl_div_forward is not implemented for type torch.cuda.LongTensor