In [7]:
import sys

sys.path.insert(0, "..")
import itertools
import os

import cocpit.config as config
import cocpit

import ipywidgets
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
plt_params = {
    "axes.labelsize": "xx-large",
    "axes.titlesize": "xx-large",
    "xtick.labelsize": "xx-large",
    "ytick.labelsize": "xx-large",
    "legend.title_fontsize": 12,
}
plt.rcParams["font.family"] = "serif"
plt.rcParams.update(plt_params)

### check classifications from specific model and validation dataloader

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load(
    f"/data/data/saved_models/no_mask/{config.TAG}/e15_bs64_1model(s).pt"
).cuda()
#val_data = torch.load(
#    f"/data/data/saved_val_loaders/no_mask/{config.TAG}/e15_bs64_1model(s).pt"
#)

#val_loader = torch.utils.data.DataLoader(
#    val_data, batch_size=11, shuffle=True, num_workers=20, pin_memory=True
#)
data = cocpit.data_loaders.get_data()
dataloader = torch.utils.data.DataLoader(data, batch_size=100, num_workers=20, pin_memory=True)


In [10]:
'''
use the above model and validation dataloader to check predictions
loops through all validation loader predictions but only save incorrect predictions
the incorrect predictions are loaded into a gui so that a user can decide 
whether the label was wrong/model was right
'''

model.eval()
criterion = nn.CrossEntropyLoss()  # Loss function
top_k_preds = 9  # the top k predictions will be displayed in bar chart

all_labels = []
all_paths = []
all_topk_probs = []
all_topk_classes = []
all_max_preds = []

try:
    for batch_idx, ((imgs, labels, paths), index) in enumerate(dataloader):        
        imgs = imgs.to(config.DEVICE)
        labels = labels.to(config.DEVICE)

        logits = model(imgs)
        # dimension 1 because taking the prediction
        # with the highest probability
        # from all classes across each index in the batch
        _, max_preds = torch.max(logits, dim = 1)  

        #convert back to lists from being on gpus
        max_preds = max_preds.cpu().tolist()
        labels = labels.cpu().tolist()

        wrong_idx = [index for index, elem in enumerate(max_preds)
                               if elem != labels[index]]

        # make sure there is an incorrect prediction in this batch otherwise skip appending      
        if len(wrong_idx)!=0: 
            # get top k predictions for each index in the batch for bar chart
            predictions = F.softmax(logits, dim=1)
            topk = predictions.cpu().topk(top_k_preds)  # top k predictions
            probs, classes = [e.data.numpy().squeeze().tolist() for e in topk]

            # human label and image path
            all_labels.append([labels[i] for i in wrong_idx])
            all_paths.append([paths[i] for i in wrong_idx])

            # model top k predicted  probability and classes per image
            all_topk_probs.append([probs[i] for i in wrong_idx])
            all_topk_classes.append([classes[i] for i in wrong_idx])

            # top predicted class from model
            all_max_preds.append([max_preds[i] for i in wrong_idx])

except FileNotFoundError:
    print("There are files in the dataloader that have already moved and cannot be found.")
    print("This is likely due to running an old model that has not captured the updated file movement.")
    print("Try rerunning the model to update the validation dataloaders.  Stopping prematurely.")
    pass 

all_labels = np.asarray(list(itertools.chain(*all_labels)))
all_paths = np.asarray(list(itertools.chain(*all_paths)))
all_topk_probs = np.asarray(list(itertools.chain(*all_topk_probs)))
all_topk_classes = np.asarray(list(itertools.chain(*all_topk_classes)))
all_max_preds = np.asarray(list(itertools.chain(*all_max_preds)))

            
print('DONE FINDING INCORRECT PREDICTIONS!')
print(f'There are {len(all_labels)} images to check!')

DONE FINDING INCORRECT PREDICTIONS!
There are 11967 images to check!


In [5]:
'''if you stopped the previous cell early because there are
so many wrong predictions run this to capture labels up until you stopped waiting'''
all_labels = np.asarray(list(itertools.chain(*all_labels)))
all_paths = np.asarray(list(itertools.chain(*all_paths)))
all_topk_probs = np.asarray(list(itertools.chain(*all_topk_probs)))
all_topk_classes = np.asarray(list(itertools.chain(*all_topk_classes)))
all_max_preds = np.asarray(list(itertools.chain(*all_max_preds)))

In [11]:
'''
code for ipywidget buttons called cocpit/gui.py

this cell displays a bar chart of predictions that the model outputs
a dropdown menu is available to move the image if you think the model got the label right 
when you choose an option from the dropdown list, the image will be moved to that category in the training dataset
if you don't want to move the image and the human labeled correctly, simply click "Next"
'''
gui = cocpit.gui.GUI(all_labels, all_paths, all_topk_probs, all_topk_classes, all_max_preds)
gui.make_buttons()
display(ipywidgets.HBox([gui.center, gui.menu, gui.forward]))

HBox(children=(Output(), Dropdown(description='Category:', options=('agg', 'budding', 'bullets', 'columns', 'c…

In [61]:
len(all_labels)

1340

In [9]:
torch.cuda.empty_cache()