In [None]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
import os
import matplotlib.pyplot as plt
import numpy as np
import json
import pandas as pd
import math
from PIL import Image

In [None]:
# import pretrained models
alexnet = torchvision.models.alexnet(pretrained = True)
resnet = torchvision.models.resnet50(pretrained = True)
vgg = torchvision.models.vgg16(pretrained = True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

alexnet_9l = torch.load('alexnet_9l.pth')
alexnet_9l.to(device)
alexnet_9l.eval()

In [None]:
# Taken from https://github.com/MadryLab/backgrounds_challenge/blob/46d224bb02a296681eddbae44a49da9abb5ba038/tools/model_utils.py#L40

resnet_9l = torchvision.models.resnet50(pretrained = False)
resnet_9l.fc = nn.Linear(2048, 9)
checkpoint = torch.load('in9l_resnet50.pt')

# this is there to ensure only model part of the state_dict is used and other paramters are ignored
state_dict_path = 'model'
if not ('model' in checkpoint):
    state_dict_path = 'state_dict'

sd = checkpoint[state_dict_path]
sd = {k[len('module.'):]:v for k,v in sd.items()}

# To deal with some compatability issues
model_dict = resnet_9l.state_dict()
sd = {k: v for k, v in sd.items() if k in model_dict}
model_dict.update(sd)
resnet_9l.load_state_dict(model_dict)

resnet_9l.eval()
#############################################################################

In [None]:
# import test data

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

transform_ = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

folder_dir = 'imagenet/only_fg/val_1'
data_test =  datasets.ImageFolder(root = folder_dir, transform = transform_,
                                       target_transform = None)

# show the test images

fig = plt.figure(figsize = (12, math.ceil(len(data_test)/20)))
fig.tight_layout()
for i in range(len(data_test)):
    img = data_test[i][0].permute(1, 2, 0)
    img_norm = (img - img.min()) / (img.max() - img.min())
    plt.subplot(math.ceil(len(data_test)/20), 20, i+1)
    plt.imshow(img_norm)
    plt.title(i)
    plt.axis('off')
plt.show()

In [None]:
folder_dir = 'imagenet/test_0924'
data_test_1 =  datasets.ImageFolder(root = folder_dir, transform = transform_,
                                       target_transform = None)

fig = plt.figure(figsize = (12, math.ceil(len(data_test)/20)))
fig.tight_layout()
for i in range(len(data_test_1)):
    img = data_test[i][0].permute(1, 2, 0)
    img_norm = (img - img.min()) / (img.max() - img.min())
    plt.subplot(math.ceil(len(data_test)/20), 20, i+1)
    plt.imshow(img_norm)
    plt.title(i)
    plt.axis('off')
plt.show() 

predictions = []
for data in data_test_1:
    logits = resnet_9l(data[0].unsqueeze(0))
    pred = logits.max(dim=1)[1].item()
    print(pred)

In [None]:
def cos_sim(vector_a, vector_b):
    
    vector_a = np.mat(vector_a)
    vector_b = np.mat(vector_b)
    num = float(vector_a * vector_b.T)
    denom = np.linalg.norm(vector_a) * np.linalg.norm(vector_b)
    cos = num / denom
    sim = 0.5 + 0.5 * cos
    return sim

In [None]:
# make prediction

pred_alexnet = []
pred_resnet = []
pred_vgg = []

diff_alexnet = []
diff_resnet = []
diff_vgg = []

logits_alexnet = None
logits_resnet = None
logits_vgg = None

alexnet.eval()
resnet.eval()
vgg.eval()

with open("imagenet_class_index.json") as f:
    imagenet_classes = {int(i):x[1] for i,x in json.load(f).items()}
    
# use super_class 
for i, data in enumerate(data_test):
    img = data[0].unsqueeze(0)
    
    logits_alexnet_last = logits_alexnet
    logits_alexnet = alexnet(img)
    pred = logits_alexnet.max(dim=1)[1].item()
    # if the predictions are not included in the superclasses, put '*' near them
    try:
        pred_class = super_class.loc[pred].sup_class
        pred_alexnet.append(pred_class)
    except:
        pred_alexnet.append(str(imagenet_classes[pred]) + '*')
    
    if (i%2) != 0:
        diff_alexnet.append(cos_sim(logits_alexnet.detach().numpy(),logits_alexnet_last.detach().numpy()))
    
    logits_resnet_last = logits_resnet
    logits_resnet = resnet(img)
    pred = logits_resnet.max(dim=1)[1].item()
    try:
        pred_class = super_class.loc[pred].sup_class
        pred_resnet.append(pred_class)
    except:
        pred_resnet.append(str(imagenet_classes[pred]) + '*')
    
    if (i%2) != 0:
        diff_resnet.append(cos_sim(logits_resnet.detach().numpy(),logits_resnet_last.detach().numpy()))
    
    logits_vgg_last = logits_vgg
    logits_vgg = vgg(img)
    pred = logits_vgg.max(dim=1)[1].item()
    try:
        pred_class = super_class.loc[pred].sup_class
        pred_vgg.append(pred_class)
    except:
        pred_vgg.append(str(imagenet_classes[pred]) + '*')
    if (i%2) != 0:
        diff_vgg.append(cos_sim(logits_vgg.detach().numpy(),logits_vgg_last.detach().numpy()))

In [None]:
# make prediction

pred_alexnet = []
# pred_resnet = []
# pred_vgg = []

diff_alexnet = []
# diff_resnet = []
# diff_vgg = []

logits_alexnet = []
# logits_resnet = []
# logits_vgg = None

alexnet.eval()
# resnet_9l.eval()
# vgg.eval()
    
for i, data in enumerate(data_test):
    img = data[0].unsqueeze(0).to(device)
    
#     logits_alexnet_last = logits_alexnet
#     logits_alexnet = alexnet(img)
#     pred = logits_alexnet.max(dim=1)[1].item()
#     # if the predictions are not included in the superclasses, put '*' near them
#     try:
#         pred_class = super_class.loc[pred].sup_class
#         pred_alexnet.append(pred_class)
#     except:
#         pred_alexnet.append(str(imagenet_classes[pred]) + '*')
    
#     if (i%2) != 0:
#         diff_alexnet.append(cos_sim(logits_alexnet.detach().numpy(),logits_alexnet_last.detach().numpy()))
    
    # logits_resnet_last = logits_resnet
    logits = alexnet_9l(img)
    logits_alexnet.append(logits)
    pred = logits.max(dim=1)[1].item()
    pred_alexnet.append(pred)
#     try:
#         pred_class = super_class.loc[pred].sup_class
#         pred_resnet.append(pred_class)
#     except:
#         pred_resnet.append(str(imagenet_classes[pred]) + '*')
    
#     if (i%2) != 0:
#         diff_resnet.append(cos_sim(logits_resnet.detach().numpy(),logits_resnet_last.detach().numpy()))
    
#     logits_vgg_last = logits_vgg
#     logits_vgg = vgg(img)
#     pred = logits_vgg.max(dim=1)[1].item()
#     try:
#         pred_class = super_class.loc[pred].sup_class
#         pred_vgg.append(pred_class)
#     except:
#         pred_vgg.append(str(imagenet_classes[pred]) + '*')
#     if (i%2) != 0:
#         diff_vgg.append(cos_sim(logits_vgg.detach().numpy(),logits_vgg_last.detach().numpy()))

In [None]:
# record errors made

n_subclass = len(data_test.classes)
subclasses = data_test.classes[0:n_subclass]

err_alexnet = dict(map(lambda x: [x,[]], subclasses))
# err_resnet = dict(map(lambda x: [x,[]], subclasses))
# err_vgg = dict(map(lambda x: [x,[]], subclasses))

# use super_class 
for i in range(len(pred_alexnet)):
    if pred_alexnet[i] != 0: # data_test.classes[data_test[i][1]]:
    # if pred_alexnet[i] != imagenet_classes[data_test[i][1]]:
        err_alexnet[data_test.classes[data_test[i][1]]].append((i, pred_alexnet[i]))
#     if pred_resnet[i] != data_test.classes[data_test[i][1]]:
#     # if pred_resnet[i] != imagenet_classes[data_test[i][1]]:
#         err_resnet[data_test.classes[data_test[i][1]]].append((i, pred_resnet[i]))
#     if pred_vgg[i] != data_test.classes[data_test[i][1]]:
#     # if pred_vgg[i] != imagenet_classes[data_test[i][1]]:
#         err_vgg[data_test.classes[data_test[i][1]]].append((i, pred_vgg[i]))

print('errors made by alexnet: ' + '\n')
print(err_alexnet)
# print('\n' + 'errors made by resnet: ' + '\n')
# print(err_resnet)
# print('\n' + 'errors made by vgg: ' + '\n')
# print(err_vgg)

In [None]:
# plot the errors made by alexnet and resnet, the titles represent the false predictions

for class_name in err_alexnet.keys():
    
    print('\n' + '\033[1m' + 'Errors made on {} by: '.format(class_name))
    fig = plt.figure(figsize = (14, math.ceil(len(err_alexnet[class_name])/3.5)))
    plt.suptitle('Alexnet', y = 1.01)
    plt.subplots_adjust(wspace = 0.1, hspace = 0.1)
    for i, data in enumerate(err_alexnet[class_name]):
        num, false_pred = data[0], data[1]
        img = data_test[num][0].permute(1, 2, 0)
        img_norm = (img - img.min()) / (img.max() - img.min())
        plt.subplot(math.ceil(len(err_alexnet[class_name])/10), 10, i+1)
        plt.imshow(img_norm)
        plt.title(str(num) + '\n' + str(false_pred), fontsize = 11)
        plt.axis('off')
    plt.show()    

    fig = plt.figure(figsize = (14, math.ceil(len(err_resnet[class_name])/3.5)))
    plt.suptitle('Resnet', y = 1.01)
    plt.subplots_adjust(wspace = 0.1, hspace = 0.1)
    for i, data in enumerate(err_resnet[class_name]):
        num, false_pred = data[0], data[1]
        img = data_test[num][0].permute(1, 2, 0)
        img_norm = (img - img.min()) / (img.max() - img.min())
        plt.subplot(math.ceil(len(err_resnet[class_name])/10), 10, i+1)
        plt.imshow(img_norm)
        plt.title(str(num) + '\n' + str(false_pred), fontsize = 11)
        plt.axis('off')
    plt.show()
    
    fig = plt.figure(figsize = (14, math.ceil(len(err_vgg[class_name])/3.5)))
    plt.suptitle('VGG', y = 1.01)
    plt.subplots_adjust(wspace = 0.1, hspace = 0.1)
    for i, data in enumerate(err_vgg[class_name]):
        num, false_pred = data[0], data[1]
        img = data_test[num][0].permute(1, 2, 0)
        img_norm = (img - img.min()) / (img.max() - img.min())
        plt.subplot(math.ceil(len(err_vgg[class_name])/10), 10, i+1)
        plt.imshow(img_norm)
        plt.title(str(num) + '\n' + str(false_pred), fontsize = 11)
        plt.axis('off')
    plt.show()

#### Quantify the influence of background

In [None]:
logits_alexnet_nb = logits_alexnet[0:int(len(logits_alexnet)/2)]
logits_alexnet_b = logits_alexnet[int(len(logits_alexnet)/2):]

cos_similarities = []
for i in range(len(logits_alexnet_nb)):
    cos_similarities.append(cos_sim(logits_alexnet_nb[i].detach().to('cpu'), 
                                    logits_alexnet_b[i].detach().to('cpu')))

height = (len(list(filter(lambda x: (0.6<=x) and (x <0.65), cos_similarities))),
len(list(filter(lambda x: (0.65<=x) and (x <0.7), cos_similarities))),
len(list(filter(lambda x: (0.7<=x) and (x <0.75), cos_similarities))),
len(list(filter(lambda x: (0.75<=x) and (x <8), cos_similarities))),
len(list(filter(lambda x: (0.8<=x) and (x <0.85), cos_similarities))),
len(list(filter(lambda x: (0.85<=x) and (x <0.9), cos_similarities))),
len(list(filter(lambda x: (0.9<=x) and (x <0.95), cos_similarities))),
len(list(filter(lambda x: (0.95<=x) and (x <1), cos_similarities))))

x_label = ['0.6-0.65', '0.65-0.7', '0.7-0.75', '0.75-0.8', '0.8-0.85', '0.85-0.9', '0.9-0.95', '0.95-1']
plt.figure(figsize = (10,6))
plt.bar(x_label, height)
plt.title('Cosine Similarities')
plt.ylabel('number of images')
plt.show()

##### The remaining part is not related to the current task

Testing the performance on stylized images

In [None]:
sti_name = 'filled-silhouettes'
# class_name = 'bear'

data_dir = 'texture-vs-shape-master/stimuli/' # + sti_name  + '/' + class_name + '/'

data_sti =  datasets.ImageFolder(root = data_dir, transform = transform_,
                                       target_transform = None)

In [None]:
resnet.eval()
# pred_alex = []
plt.figure(figsize = (15, math.ceil(len(data_sti)/5)))

for i, data in enumerate(data_sti):
    img = data[0]
    pred = resnet(img.unsqueeze(0))
    pred_class = str(imagenet_classes[pred.max(dim=1)[1].item()])
    img_norm = (img - img.min()) / (img.max() - img.min())
    plt.subplot(math.ceil(len(data_sti)/10), 10, i+1)
    plt.imshow(img_norm.permute(1, 2, 0))
    plt.title(pred_class, fontsize = 11)
    plt.axis('off')
    if i == 600:
        break
plt.show()

In [None]:
# deleted codes

"""
preprocess = transforms.Compose([
   transforms.Resi
   \ze((224, 224)),
   transforms.ToTensor(),
])

class Normalize(nn.Module):
    def __init__(self, mean, std):
        super(Normalize, self).__init__()
        self.mean = torch.Tensor(mean)
        self.std = torch.Tensor(std)
    def forward(self, x):
        return (x - self.mean.type_as(x)[None,:,None,None]) / self.std.type_as(x)[None,:,None,None]

norm = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

data_dir = 'ILSVRC2012_img_val/ILSVRC2012_val_00000008.JPEG'
test_img = Image.open(data_dir)
test_img = preprocess(test_img)[None,:,:,:]
test_img_norm = norm(test_img)

plt.imshow(test_img[0].numpy().transpose(1,2,0))

alexnet.eval()

pred = alexnet(test_img_norm)
_, predicted = torch.max(pred, 1)

with open("imagenet_class_index.json") as f:
    imagenet_classes = {int(i):x[1] for i,x in json.load(f).items()}
print('predicted label:{} '.format(int(predicted)) + str(imagenet_classes[pred.max(dim=1)[1].item()]))
print('true label:{} '.format(int(labels[0][7])) + str(imagenet_classes[int(labels[0][7])]))
"""