In [1]:
import sys

sys.path.insert(0, "..")
import itertools
import os

import ipywidgets
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn

import cocpit
import cocpit.config as config

%load_ext autoreload
%autoreload 2

In [2]:
plt_params = {
    "axes.labelsize": "xx-large",
    "axes.titlesize": "xx-large",
    "xtick.labelsize": "xx-large",
    "ytick.labelsize": "xx-large",
    "legend.title_fontsize": 12,
}
plt.rcParams["font.family"] = "serif"
plt.rcParams.update(plt_params)

### check classifications from specific model and validation dataloader

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = torch.load(
    f"/data/data/saved_models/no_mask/{config.TAG}/e15_bs64_1model(s).pt"
).cuda()
val_data = torch.load(
    f"/data/data/saved_val_loaders/no_mask/{config.TAG}/e15_bs64_1model(s).pt"
)

val_loader = torch.utils.data.DataLoader(
    val_data, batch_size=11, shuffle=True, num_workers=20, pin_memory=True
)

In [4]:
'''
use the above model and validation dataloader to check predictions
loops through all validation loader predictions but only save incorrect predictions
the incorrect predictions are loaded into a gui so that a user can decide 
whether the label was wrong/model was right
'''

model.eval()
criterion = nn.CrossEntropyLoss()  # Loss function
top_k_preds = 9  # the top k predictions will be displayed in bar chart

all_labels = []
all_paths = []
all_topk_probs = []
all_topk_classes = []
all_max_preds = []

for batch_idx, ((imgs, labels, paths), index) in enumerate(val_loader):        
    
    imgs = imgs.to(config.DEVICE)
    labels = labels.to(config.DEVICE)

    logits = model(imgs)
    # dimension 1 because taking the prediction
    # with the highest probability
    # from all classes across each index in the batch
    _, max_preds = torch.max(logits, dim = 1)  

    #convert back to lists from being on gpus
    max_preds = max_preds.cpu().tolist()
    labels = labels.cpu().tolist()

    wrong_idx = [index for index, elem in enumerate(max_preds)
                           if elem != labels[index]]
    
    # make sure there is an incorrect prediction in this batch otherwise skip appending      
    if len(wrong_idx)!=0: 
        # get top k predictions for each index in the batch for bar chart
        predictions = F.softmax(logits, dim=1)
        topk = predictions.cpu().topk(top_k_preds)  # top k predictions
        probs, classes = [e.data.numpy().squeeze().tolist() for e in topk]

        # human label and image path
        all_labels.append([labels[i] for i in wrong_idx])
        all_paths.append([paths[i] for i in wrong_idx])
        
        # model top k predicted  probability and classes per image
        all_topk_probs.append([probs[i] for i in wrong_idx])
        all_topk_classes.append([classes[i] for i in wrong_idx])
        
        # top predicted class from model
        all_max_preds.append([max_preds[i] for i in wrong_idx])

all_labels = np.asarray(list(itertools.chain(*all_labels)))
all_paths = np.asarray(list(itertools.chain(*all_paths)))
all_topk_probs = np.asarray(list(itertools.chain(*all_topk_probs)))
all_topk_classes = np.asarray(list(itertools.chain(*all_topk_classes)))
all_max_preds = np.asarray(list(itertools.chain(*all_max_preds)))

            
print('DONE FINDING INCORRECT PREDICTIONS!')
print(f'There are {len(all_labels)} images to check!')

[4, 10]
[2, 3]
[4, 5]
[1, 4, 9]
[3, 9]
[3, 5]
[5]
[7]
[10]
[2, 5, 6, 8, 9]
[0]
[4, 6]
[6]
[6, 8, 10]
[2]
[0]
[3, 4, 5, 6]
[9]
[5, 6]
[1, 8, 10]
[4, 5]
[7]
[1, 2, 5]
[9]
[4, 5]
[1]
[1, 2, 7]
[5]
[2, 6]
[6]
[9]
[0, 10]
[0, 5, 8]
[0, 10]
[2, 10]
[2]
[0, 2, 3, 5]
[3]
[3, 4, 6]
[10]
[0, 2]
[7]
[6, 7, 8]
[4, 9]
[1]
[9]
[8]
[6, 9]
[2, 7]
[9]
[5]
[5, 10]
[0, 7]
[2, 5, 10]
[2, 5]
[2, 6]
[10]
[9]
[1, 4, 7]
[4, 6, 10]
[10]
[3]
[1, 4, 8]
[8, 10]
[0, 3, 7]
[2, 9]
[4, 6, 8, 9]
[3]
[1]
[1, 6]
[0, 8, 9]
[8]
[2, 10]
[0, 6]
[0, 2, 4, 5, 6, 9]
[5]
[3, 5, 6, 9]
[8, 9]
[6]
[2]


Exception in thread Thread-7:
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/threading.py", line 973, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.9/threading.py", line 910, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.9/site-packages/torch/utils/data/_utils/pin_memory.py", line 25, in _pin_memory_loop
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
  File "/usr/local/lib/python3.9/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
  File "/usr/local/lib/python3.9/site-packages/torch/multiprocessing/reductions.py", line 282, in rebuild_storage_fd
    fd = df.detach()
  File "/usr/local/lib/python3.9/multiprocessing/resource_sharer.py", line 57, in detach
    with _resource_sharer.get_connection(self._id) as conn:
  File "/usr/local/lib/python3.9/multiprocessing/resource_sharer.py", line 86, in get_connection
    c = Client(address, authkey=process.current_process().au

KeyboardInterrupt: 

In [None]:
all_labels = np.asarray(list(itertools.chain(*all_labels)))
all_paths = np.asarray(list(itertools.chain(*all_paths)))
all_topk_probs = np.asarray(list(itertools.chain(*all_topk_probs)))
all_topk_classes = np.asarray(list(itertools.chain(*all_topk_classes)))
all_max_preds = np.asarray(list(itertools.chain(*all_max_preds)))

In [21]:
'''
code for ipywidget buttons called cocpit/gui.py

this cell displays a bar chart of predictions that the model outputs
a dropdown menu is available to move the image if you think the model got the label right 
when you choose an option from the dropdown list, the image will be moved to that category in the training dataset
if you don't want to move the image and the human labeled correctly, simply click "Next"
'''
gui = cocpit.gui.GUI(all_labels, all_paths, all_topk_probs, all_topk_classes, all_max_preds)
gui.make_buttons()
display(ipywidgets.HBox([gui.center, gui.menu, gui.forward]))

HBox(children=(Output(), Dropdown(description='Category:', options=('agg', 'budding', 'bullets', 'columns', 'c…