Model: RESNET50
Classes: {Bee, Goose, Snail}

In [None]:
# install dependencies
!pip install openimages

In [None]:
# imports
from pathlib import Path

import numpy as np
import torch
import torchvision
from openimages.download import download_dataset
from PIL import Image
from torch.utils.data import Dataset, DataLoader

In [None]:
# constants
NUMBER_OF_SAMPLE_CLASSES = 3
NUMBER_OF_SAMPLE_IMAGES_PER_CLASS = 350
SAMPLE_IMAGES_ROOT_DIRECTORY = "/sample_images"
IMAGENET_CLASS_INDEX_MAPPING = {"bee":300, "goose":99, "snail":113}
T_VALUES = [0.5, 0.5, 0.5]

In [None]:
# define a custom dataset class

class CustomDataset(Dataset):
  """
  Attributes
  ----------
  all_images_paths: list[str]
   contains paths of all images in the root directory
  images_classes_by_directory: dict[str, str]
    mapping from directory paths to their corresponding class names
  image_label_index_mapping: dict[str, int]
    mapping from class names to indices.
  transform callable
    transformation function applied to each image
  """

  def __init__(self, root_image_directory: str, images_classes_by_directory: dict[str, str], image_label_index_mapping: dict[str, int], transform: callable) -> None:
    """
    Parameters:
    -----------
    root_image_directory: str
      root directory with subdirectories with images.
    images_classes_by_directory: dict[str, str]
      mapping from directory paths to class names
    image_label_index_mapping: dict[str, int]
     mapping from class names to integer indices
    transform: callable
      transformation applied to images.
    """

    self.all_images_paths: list[str] = [str(image_path) for image_path in Path(root_image_directory).rglob("*.jpg")]
    self.images_classes_by_directory: dict[str, str] = images_classes_by_directory
    self.image_label_index_mapping = image_label_index_mapping
    self.transform = transform

  def __len__(self) -> int:
    """
    Returns the total number of images in the dataset
    """

    return len(self.all_images_paths)

  def __getitem__(self, index: int) -> tuple[torch.Tensor, int]:
    """
    Returns image at given index as a tuple
    """

    image_path: str = self.all_images_paths[index]
    image = Image.open(image_path).convert(mode="RGB")

    image_label = self.images_classes_by_directory[image_path.rpartition("/")[0]]

    return (self.transform(image), self.image_label_index_mapping[image_label])


In [None]:
# class to store confusion matrix per class
class ConfusionMatrix(object):
  """
  Attributes
  ----------
  TP: int
    true positives
  TN: int
    true negatives
  FP:
    false positives
  FN:
    false negatives
  """

  def __init__(self, class_name:str, class_label_index:int, probabilities: np.ndarray, ground_truth_labels: np.ndarray, t_value: float = 0.5) -> None:
    self.class_name = class_name

    predicted_positive = (probabilities > t_value)
    actual_positive = (ground_truth_labels == class_label_index)

    self.TP = np.sum(predicted_positive & actual_positive)
    self.TN = np.sum(~predicted_positive & ~actual_positive)
    self.FP = np.sum(predicted_positive & ~actual_positive)
    self.FN = np.sum(~predicted_positive & actual_positive)


In [None]:
# class to calculate and store class statistics
class Statistics(object):
  def __init__(self, confusion_matrix: ConfusionMatrix) -> None:
    self.class_name = confusion_matrix.class_name

    self.ACCURACY = (confusion_matrix.TP + confusion_matrix.TN) / denominator if  (denominator:= confusion_matrix.TP + confusion_matrix.TN + confusion_matrix.FP + confusion_matrix.FN) > 0 else 0
    self.PRECISION = confusion_matrix.TP / denominator if (denominator:= confusion_matrix.TP + confusion_matrix.FP) > 0 else 0
    self.RECALL = confusion_matrix.TP / denominator if (denominator:= confusion_matrix.TP + confusion_matrix.FN) > 0 else 0
    self.F1 = 2 * (self.PRECISION * self.RECALL) / denominator if (denominator:= self.PRECISION + self.RECALL) > 0 else 0

  def __repr__(self) -> str:
    return f"Class {self.class_name} statistics:\nAccuracy: {self.ACCURACY} \nPrecision: {self.PRECISION}\nRecall: {self.RECALL}\nF1:{self.F1}\n"

In [None]:
# download images
Path(SAMPLE_IMAGES_ROOT_DIRECTORY).mkdir(parents=True, exist_ok=True)

sample_images_directories_by_class = download_dataset(dest_dir=SAMPLE_IMAGES_ROOT_DIRECTORY, class_labels=[label.capitalize() for label in IMAGENET_CLASS_INDEX_MAPPING.keys()], limit=NUMBER_OF_SAMPLE_IMAGES_PER_CLASS)

In [None]:
# image transformations for RESNET50 model
transform = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]
)


In [None]:
# initialize dataset
images_classes_by_directory: dict[str, str] = {images_directory["images_dir"]: images_class for images_class, images_directory in sample_images_directories_by_class.items()}
dataset: CustomDataset = CustomDataset(root_image_directory=SAMPLE_IMAGES_ROOT_DIRECTORY, images_classes_by_directory=images_classes_by_directory, image_label_index_mapping=IMAGENET_CLASS_INDEX_MAPPING, transform=transform)


In [None]:
# initialize dataloader
dataloader: DataLoader = DataLoader(dataset=dataset, batch_size=16, shuffle=True, num_workers=4)


In [None]:
# initialize torch device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
# initialize model
resnet_model = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT)
resnet_model.to(device=device)


In [None]:
# evaluate model
# initializes empty numpy arrays to store probabilities of each class and ground truth
sample_images_classes_probabilities = np.empty((NUMBER_OF_SAMPLE_IMAGES_PER_CLASS*NUMBER_OF_SAMPLE_CLASSES,NUMBER_OF_SAMPLE_CLASSES))
ground_truth_labels = np.empty((NUMBER_OF_SAMPLE_IMAGES_PER_CLASS*NUMBER_OF_SAMPLE_CLASSES), dtype=int)
current_index = 0

resnet_model.eval()

with torch.no_grad():
  # extract indexes of classes we want to evaluate for
  sample_images_classes_indexes: list[int] = [index for index in IMAGENET_CLASS_INDEX_MAPPING.values()]

  for images, labels in dataloader:
    images = images.to(device)
    probabilities = torch.sigmoid(resnet_model(images)).cpu().numpy()

    # extract probabilities only of classes we want to evaluate for
    sample_classes_probabilities = probabilities[:, sample_images_classes_indexes]
    batch_size = sample_classes_probabilities.shape[0]

    # assign batch probabilities and ground truths to the complete array
    sample_images_classes_probabilities[current_index:current_index + batch_size, :] = sample_classes_probabilities

    ground_truth_labels[current_index:current_index + batch_size] = labels.cpu().numpy()

    current_index += batch_size


In [None]:
# initialize confusion matrixes for each image sample class
classes_confusion_matrixes = [ConfusionMatrix(class_name=class_name, class_label_index=imagenet_index, probabilities=sample_images_classes_probabilities[:, index], ground_truth_labels=ground_truth_labels, t_value=T_VALUES[index]) for index, (class_name, imagenet_index) in enumerate(IMAGENET_CLASS_INDEX_MAPPING.items())]


In [None]:
# calculate statistics for each class
classes_statistics = [Statistics(confusion_matrix) for confusion_matrix in classes_confusion_matrixes]
print(classes_statistics)