In each image stack, there are 44 images and only 3 are considered "acceptable" focus. This means the "acceptable" class has a much smaller population than out of focus classes. In previous rounds of training the model I did not correct for this, allowing the difference in population size to bias the training. In the 3 category model (below, acceptable, and above) this had a strong effect causing the model to miss every single acceptable image despite the model's apparent high accuracy. This bias is not as marked in the 5 and 7 category models since the out of focus images are split into more separate classes, but there are still more images in each out of focus class than in the acceptable class. In this notebook I train the model on a set of images in which the bias has been corrected to see if it improves accuracy overall and accuracy in identifying acceptable images as acceptable.

In [1]:
# Quick check to see how many images there are per class

def count_images(file_path):
    # Finds class folders, makes a list of classes, and counts how many images are in each class
    import os
    from pathlib import Path
    
    image_counter = []
    class_names = []
    
    for class_name in sorted(os.listdir(file_path)):
        # Exclude .DS_Store
        if class_name != '.DS_Store':
            
            class_names.append(class_name)

            # Make a Path to the class directory
            class_dir = Path(file_path) / class_name

            # Note that this is set to work with .png images and needs modification
            # to work with other types
            image_counter.append(len(os.listdir(class_dir)))
                          
    return image_counter, class_names

In [2]:
train_path = '/Users/zplab/Desktop/VeraPythonScripts/vera_autofocus/microscope_images_5cat/train'
train_counts, class_names = count_images(train_path)
print(class_names)
print(train_counts)

['0', '1', '2', '3', '4']
[298, 95, 57, 95, 310]


In [3]:
test_path = '/Users/zplab/Desktop/VeraPythonScripts/vera_autofocus/microscope_images_5cat/test'
test_counts, class_names = count_images(test_path)
print(class_names)
print(test_counts)

['0', '1', '2', '3', '4']
[292, 100, 60, 100, 348]


In the test images, there are 60 acceptable samples compared to 348 and 292 in the two most out of focus classes.

In [6]:
# Import the image processing functions and class
from image_import import process_image, de_process_image, wormDataset

# Import all needed libraries
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
# These last two are used to save info about how the training progressed
import pickle
import datetime

# Set the full path to the main image directory
train_dir = '/Users/zplab/Desktop/VeraPythonScripts/vera_autofocus/microscope_images_5cat/train'
test_dir = '/Users/zplab/Desktop/VeraPythonScripts/vera_autofocus/microscope_images_5cat/test'
num_train = 10
num_test = 10

means = [0.485, 0.456, 0.406]
stds = [0.229, 0.224, 0.225]

traindata = wormDataset(train_dir, means, stds)
testdata = wormDataset(test_dir, means, stds)

# Get the classes
class_names = traindata.classes
print('Detected ' + str(len(class_names)) + ' classes in training data')
print(class_names)

# Print out how many images are in the trainloader and testloader
print('Traindata length = ' + str(len(traindata)) + ', testdata length = ' + str(len(testdata)))

Detected 5 classes in training data
['0', '1', '2', '3', '4']
Traindata length = 855, testdata length = 900


In [7]:
# Load using the non-corrected set for comparison
# Load from the training and test sets
trainloader = torch.utils.data.DataLoader(traindata, batch_size=num_train, shuffle=True)
testloader = torch.utils.data.DataLoader(testdata, batch_size=num_test, shuffle=True)

# Print out how many images are in the trainloader and testloader
print("Train batch size = " + str(num_train) + ', test batch size = ' + str(num_test))
print('Trainloder length = ' + str(len(trainloader)) + ', testloader length = ' + str(len(testloader)))

Train batch size = 10, test batch size = 10
Trainloder length = 86, testloader length = 90


In [12]:
# Iterate through images in the test loader to see what labels (corresponding to classes) they have
label_dict = {}
for data in testloader:
    images, labels = data
    for label in labels:
        # Convert the label into a number
        label = torch.Tensor.numpy(label)
        label = label.item()
        if label in label_dict:
            label_dict[label] += 1
        else:
            label_dict[label] = 1
label_dict

{0: 292, 4: 348, 3: 100, 2: 60, 1: 100}

The code for the balance correcting sampler is heavily based on this repo:
https://github.com/ufoym/imbalanced-dataset-sampler

Basically, the sampler is a data structure that takes the dataset, and then the sampler(dataset) is passed to the dataloader. I re-wrote the sampler to accept the wormDataset.

In [39]:
import torch
from torch.utils import data


class wormDatasetSampler(torch.utils.data.sampler.Sampler):
    """Samples elements randomly from a given list of indices for imbalanced dataset
    Arguments:
        indices (list, optional): a list of indices
        num_samples (int, optional): number of samples to draw
        
    To use the sampler, add an argument to the DataLoader sampler = wormDatasetSampler(wormDataset)
    This will pass the weights for the labels to the Dataloader
    Note that shuffle must be false, if shuffling is desired that needs to be part of the sampler
    """

    def __init__(self, dataset, indices=None, num_samples=None):

        # Make a set of indices to iterate through
        self.indices = list(range(len(dataset)))

        # Get the number of samples in the dataset
        self.num_samples = len(self.indices)

        # Make a dictionary with labels as keys and number of samples with that label as values
        label_to_count = {}
        for idx in self.indices:
            label = self._get_label(dataset, idx)
            if label in label_to_count:
                label_to_count[label] += 1
            else:
                label_to_count[label] = 1

        # weight for each sample
        weights = [1.0 / label_to_count[self._get_label(dataset, idx)]
                   for idx in self.indices]

        self.weights = torch.DoubleTensor(weights)
        # I think these weights are what is passed on to the DataLoader, presumably it knows what to do from there

    def _get_label(self, dataset, idx):
    	# Get the label from the dataset
        # In the wormDataset, each sample is a 1, 2 tensor with an array representing the image + the class
        # tensor[image_array, class_label]
        sample = dataset[idx]
        label = sample[1]
        return label

        #image_import.wormDataset

    def __iter__(self):
        
        # The sampler has to specify how to iterate through itself
        return (self.indices[i] for i in torch.multinomial(
            self.weights, self.num_samples, replacement=True))     

    def __len__(self):
        return self.num_samples


In [40]:
# Re-load the data, this time with the sampler enclosing the dataset
trainloader = torch.utils.data.DataLoader(traindata, sampler = wormDatasetSampler(traindata), batch_size=num_train, shuffle=False)
testloader = torch.utils.data.DataLoader(testdata, sampler = wormDatasetSampler(testdata), batch_size=num_test, shuffle=False)
# Shuffle has to be set to False when using a sampler. If you want shuffling it needs to happen in the sampler

# Print out how many images are in the trainloader and testloader
print("Train batch size = " + str(num_train) + ', test batch size = ' + str(num_test))
print('Trainloader length = ' + str(len(trainloader)) + ', testloader length = ' + str(len(testloader)))

Train batch size = 10, test batch size = 10
Trainloader length = 86, testloader length = 90


In [41]:
# This code block currently doesn't work, the wormDataset has issues with iterating over itself
# Iterate through images in the test loader to see what labels (corresponding to classes) they have
label_dict = {}
for data in testloader:
    images, labels = data
    for label in labels:
        # Convert the label into a number
        label = torch.Tensor.numpy(label)
        label = label.item()
        if label in label_dict:
            label_dict[label] += 1
        else:
            label_dict[label] = 1
label_dict

{0: 183, 2: 172, 1: 174, 3: 200, 4: 171}

Now that the data has been sampled using wormDatasetSampler, there are a similar number of samples per class and population size will not be a source of bias.

In [42]:
%%capture 
# Prevent printing out the model architecture
# Check if cuda is available, and set pytorch to run on GPU or CPU as appropriate
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('Cuda available, running on GPU')
else:
    device = torch.device("cpu")
    print('Cuda is not available, running on CPU')
    # Give the user a message so they know what is going on

model = models.resnet50(pretrained=True)
#print(model) 
# Printing the model shows some of the internal layers, not expected to
# understand these but neat to see

# Freeze the pre-trained layers, no need to update featue detection
for param in model.parameters():
    param.requires_grad = False

# Get the number of features the model expects in the final fully connected layer, this is different
# in different models
num_ftrs = model.fc.in_features

# Re-define the final fully connected layer (model.fc, fc = fully connected)
model.fc = nn.Sequential(nn.Linear(num_ftrs, 512), # 2048 inputs to 512 outputs 
                                 nn.ReLU(),
                                 nn.Dropout(0.2),
                                 # The next line needs to be modified for the number of classes
                                 # in the data set. For the microscope images I currently have 
                                 # five classes, so there are 5 outputs
                                 nn.Linear(512, 5), # 512 inputs to 5 outputs
                                 nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.003)
model.to(device)

In [None]:
# Train the network
epochs = 2
steps = 0
running_loss = 0
print_every = 10
train_losses, test_losses, accuracy_tracker = [], [], []
for epoch in range(epochs):
    for inputs, labels in trainloader:
        steps += 1
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        logps = model.forward(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        if steps % print_every == 0:
            test_loss = 0
            accuracy = 0
            model.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    test_loss += batch_loss.item()
                    
                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
            train_losses.append(running_loss/len(trainloader))
            test_losses.append(test_loss/len(testloader)) 
            accuracy_tracker.append(accuracy/len(testloader))                     
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {running_loss/print_every:.3f}.. "
                  f"Test loss: {test_loss/len(testloader):.3f}.. "
                  f"Test accuracy: {accuracy/len(testloader):.3f}")
            running_loss = 0
            model.train()
torch.save(model, 'resnet50_5cat_unbiased.pth')

Epoch 1/2.. Train loss: 2.689.. Test loss: 2.563.. Test accuracy: 0.239
