# Important moment

Serviceability was checked in Google Colab.

Set the resolution on which the classification model used was trained.

In [None]:
resolution_model = 224

# Connect to Drive and prepare Data

You should use gpu to get results quickly.

In [None]:
!nvidia-smi

You need to have the following structure so that you don't have to change anything in the code for working with data.

In [None]:
!gdown 1s7sFwfBp6vP1uURATChhuxC3Zvg1kcAT
!gdown 1tIQGnL6_4ToXF1V3F4DQb0Uv2vb3wubr

In [None]:
!mkdir test_dataset
!unzip images.zip -d /content/
!unzip mask.zip -d /content/
!mkdir test_dataset/{drops,partial,full,strong}

In [None]:
import os

def make_dataset(category):
  root = ""
  dir_name = "mask/" + category
  for root, dirs, files in os.walk(os.path.join(root, dir_name)):
    for file in files:
      image_name = os.path.join(root, file)
      os.replace("data/" + image_name.split('/')[2], "test_dataset/" + category + "/" + image_name.split('/')[2])

make_dataset("drops")
make_dataset("partial")
make_dataset("strong")
make_dataset("full")

In [None]:
!rm -rf data
!mkdir data
!mv test_dataset images
!mv images data/
!mv mask data/

# Download the libraries and import them

In [None]:
!pip install timm
!pip install grad-cam

In [None]:
import torch
import os
import cv2
import numpy as np
import sys
import json

from collections import defaultdict

from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch.nn import functional as F

from torchvision.models import resnet18 as resnet18
from torchvision import models, transforms

import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
import timm

from PIL import Image
import random
import pickle
import io
from torch.autograd import Variable

# CAMERAS

In [None]:
!git clone https://github.com/VisMIL/CAMERAS.git
!cp -R /content/CAMERAS/CAMERAS.py /content/

In the line self.inputResolutions = list(range(**224**, 1000, 100)) - change the underlined number to the one that the classification model was trained for (must match what is set in "Important moment").

Copy the code below and paste it into the file that is located on the path: /content/CAMERAS.py

In [None]:
import copy

import torch
from torch.nn import functional as F

class CAMERAS():
    def __init__(self, model, targetLayerName, inputResolutions=None):
        self.model = model
        self.inputResolutions = inputResolutions

        if self.inputResolutions is None:
            self.inputResolutions = list(range(224, 1000, 100)) # Update this line

        self.classDict = {}
        self.probsDict = {}
        self.featureDict = {}
        self.gradientsDict = {}
        self.targetLayerName = targetLayerName

    def _recordActivationsAndGradients(self, inputResolution, image, classOfInterest=None):
        def forward_hook(module, input, output):
            self.featureDict[inputResolution] = (copy.deepcopy(output.clone().detach().cpu()))

        def backward_hook(module, grad_input, grad_output):
            self.gradientsDict[inputResolution] = (copy.deepcopy(grad_output[0].clone().detach().cpu()))

        for name, module in self.model.named_modules():
            if name == self.targetLayerName:
                forwardHandle = module.register_forward_hook(forward_hook)
                backwardHandle = module.register_backward_hook(backward_hook)

        logits = self.model(image)
        softMaxScore = F.softmax(logits, dim=1)
        probs, classes = softMaxScore.sort(dim=1, descending=True)

        if classOfInterest is None:
            ids = classes[:, [0]]
        else:
            ids = torch.tensor(classOfInterest).unsqueeze(dim=0).unsqueeze(dim=0).cuda()

        self.classDict[inputResolution] = ids.clone().detach().item()
        self.probsDict[inputResolution] = probs[0, 0].clone().detach().item()

        one_hot = torch.zeros_like(logits)
        one_hot.scatter_(1, ids, 1.0)

        self.model.zero_grad()
        logits.backward(gradient=one_hot, retain_graph=False)
        forwardHandle.remove()
        backwardHandle.remove()
        del forward_hook
        del backward_hook

    def _estimateSaliencyMap(self, classOfInterest):
        saveResolution = self.inputResolutions[0]
        groundTruthClass = self.classDict[saveResolution]
        meanScaledFeatures = None
        meanScaledGradients = None

        count = 0
        for resolution in self.inputResolutions:
            if groundTruthClass == self.classDict[resolution] or self.classDict[resolution] == classOfInterest:
                count += 1
                upSampledFeatures = F.interpolate(self.featureDict[resolution].cuda(), (saveResolution, saveResolution), mode='bilinear', align_corners=False)
                upSampledGradients = F.interpolate(self.gradientsDict[resolution].cuda(), (saveResolution, saveResolution), mode='bilinear', align_corners=False)

                if meanScaledFeatures is None:
                    meanScaledFeatures = upSampledFeatures
                else:
                    meanScaledFeatures += upSampledFeatures

                if meanScaledGradients is None:
                    meanScaledGradients = upSampledGradients
                else:
                    meanScaledGradients += upSampledGradients

        meanScaledFeatures /= count
        meanScaledGradients /= count

        fmaps = meanScaledFeatures
        grads = meanScaledGradients

        saliencyMap = torch.mul(fmaps, grads).sum(dim=1, keepdim=True)

        saliencyMap = F.relu(saliencyMap)
        B, C, H, W = saliencyMap.shape
        saliencyMap = saliencyMap.view(B, -1)
        saliencyMap -= saliencyMap.min(dim=1, keepdim=True)[0]
        saliencyMap /= saliencyMap.max(dim=1, keepdim=True)[0]
        saliencyMap = saliencyMap.view(B, C, H, W)

        saliencyMap = torch.squeeze(torch.squeeze(saliencyMap, dim=0), dim=0)
        return saliencyMap

    def run(self, image, classOfInterest=None):
        for index, inputResolution in enumerate(self.inputResolutions):
            if index == 0:
                upSampledImage = image.cuda()
            else:
                upSampledImage = F.interpolate(image, (inputResolution, inputResolution), mode='bicubic', align_corners=False).cuda()

            self._recordActivationsAndGradients(inputResolution, upSampledImage, classOfInterest=classOfInterest)

        saliencyMap = self._estimateSaliencyMap(classOfInterest=classOfInterest)
        return saliencyMap, self.classDict, self.probsDict

The next cell does not need to be copied, just run

In [None]:
import cv2
import torch
import torchvision.models as models
import matplotlib.cm as cm
from CAMERAS import CAMERAS
from torchvision import transforms
import numpy as np

normalizeTransform = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
normalizeImageTransform = transforms.Compose([transforms.ToTensor(), normalizeTransform])

def loadImage(imagePath, imageSize):
    rawImage = cv2.imread(imagePath)
    rawImage = cv2.resize(rawImage, (resolution_model,) * 2, interpolation=cv2.INTER_LINEAR)
    rawImage = cv2.resize(rawImage, (imageSize,) * 2, interpolation=cv2.INTER_LINEAR)
    image = normalizeImageTransform(rawImage[..., ::-1].copy())
    return image, rawImage

def saveMapWithColorMap(filename, map, image):
    cmap = cv2.applyColorMap(np.uint8(255 * map.detach().numpy().squeeze()), cv2.COLORMAP_JET)
    map = (cmap.astype(np.float) + image.astype(np.float)) / 2
    cv2.imwrite(filename, np.uint8(map))

def computeAndSaveMaps(model, image_name):
    cameras = CAMERAS(model, targetLayerName="layer4") # classic CAMERAS

    image, rawImage = loadImage(image_name, imageSize=resolution_model)
    image = torch.unsqueeze(image, dim=0)

    saliencyMap, predicted_class, probs_predict = cameras.run(image)
    saliencyMap = saliencyMap.cpu()

    return saliencyMap

# Implementation of all CAM methods used in the article, except CAMERAS and CCAM

## mGrad-CAM

The next cells does not need to be copied, just run

In [None]:
from pytorch_grad_cam.utils.image import show_cam_on_image, deprocess_image, preprocess_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

In [None]:
import numpy as np
import torch
import ttach as tta
from typing import Callable, List, Tuple
from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients
from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection
from pytorch_grad_cam.utils.image import scale_cam_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget


class BaseCAM:
    def __init__(self,
                 model: torch.nn.Module,
                 target_layers: List[torch.nn.Module],
                 use_cuda: bool = False,
                 reshape_transform: Callable = None,
                 compute_input_gradient: bool = False,
                 uses_gradients: bool = True) -> None:
        self.model = model.eval()
        self.target_layers = target_layers
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()
        self.reshape_transform = reshape_transform
        self.compute_input_gradient = compute_input_gradient
        self.uses_gradients = uses_gradients
        self.activations_and_grads = ActivationsAndGradients(
            self.model, target_layers, reshape_transform)

    """ Get a vector of weights for every channel in the target layer.
        Methods that return weights channels,
        will typically need to only implement this function. """

    def get_cam_weights(self,
                        input_tensor: torch.Tensor,
                        target_layers: List[torch.nn.Module],
                        targets: List[torch.nn.Module],
                        activations: torch.Tensor,
                        grads: torch.Tensor) -> np.ndarray:
        raise Exception("Not Implemented")

    def get_cam_image(self,
                      input_tensor: torch.Tensor,
                      target_layer: torch.nn.Module,
                      targets: List[torch.nn.Module],
                      activations: torch.Tensor,
                      grads: torch.Tensor,
                      eigen_smooth: bool = False) -> np.ndarray:

        weights = self.get_cam_weights(input_tensor,
                                       target_layer,
                                       targets,
                                       activations,
                                       grads)
        weighted_activations = weights * activations
        if eigen_smooth:
            cam = get_2d_projection(weighted_activations)
        else:
            cam = weighted_activations.sum(axis=1)
        return cam

    def forward(self,
                input_tensor: torch.Tensor,
                targets: List[torch.nn.Module],
                eigen_smooth: bool = False) -> np.ndarray:

        if self.cuda:
            input_tensor = input_tensor.cuda()

        if self.compute_input_gradient:
            input_tensor = torch.autograd.Variable(input_tensor,
                                                   requires_grad=True)

        outputs = self.activations_and_grads(input_tensor)
        if targets is None:
            target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1)
            targets = [ClassifierOutputTarget(category) for category in target_categories]

        if self.uses_gradients:
            self.model.zero_grad()
            loss = sum([target(output) for target, output in zip(targets, outputs)])
            loss.backward(retain_graph=True)

        # In most of the saliency attribution papers, the saliency is
        # computed with a single target layer.
        # Commonly it is the last convolutional layer.
        # Here we support passing a list with multiple target layers.
        # It will compute the saliency image for every image,
        # and then aggregate them (with a default mean aggregation).
        # This gives you more flexibility in case you just want to
        # use all conv layers for example, all Batchnorm layers,
        # or something else.
        cam_per_layer = self.compute_cam_per_layer(input_tensor,
                                                   targets,
                                                   eigen_smooth)
        return self.aggregate_multi_layers(cam_per_layer)

    def get_target_width_height(self,
                                input_tensor: torch.Tensor) -> Tuple[int, int]:
        width, height = input_tensor.size(-1), input_tensor.size(-2)
        return width, height

    def compute_cam_per_layer(
            self,
            input_tensor: torch.Tensor,
            targets: List[torch.nn.Module],
            eigen_smooth: bool) -> np.ndarray:
        activations_list = [a.cpu().data.numpy()
                            for a in self.activations_and_grads.activations]
        grads_list = [g.cpu().data.numpy()
                      for g in self.activations_and_grads.gradients]
        target_size = self.get_target_width_height(input_tensor)

        cam_per_target_layer = []
        # Loop over the saliency image from every layer
        for i in range(len(self.target_layers)):
            target_layer = self.target_layers[i]
            layer_activations = None
            layer_grads = None
            if i < len(activations_list):
                layer_activations = activations_list[i]
            if i < len(grads_list):
                layer_grads = grads_list[i]

            cam = self.get_cam_image(input_tensor,
                                     target_layer,
                                     targets,
                                     layer_activations,
                                     layer_grads,
                                     eigen_smooth)
            cam = np.maximum(cam, 0)
            scaled = scale_cam_image(cam, target_size)
            cam_per_target_layer.append(scaled[:, None, :])

        return cam_per_target_layer

    def aggregate_multi_layers(self, cam_per_target_layer: np.ndarray) -> np.ndarray:
        cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1)
        cam_per_target_layer = np.maximum(cam_per_target_layer, 0)
        result = np.mean(cam_per_target_layer, axis=1)
        return scale_cam_image(result)

    def forward_augmentation_smoothing(self,
                                       input_tensor: torch.Tensor,
                                       targets: List[torch.nn.Module],
                                       eigen_smooth: bool = False) -> np.ndarray:
        transforms = tta.Compose(
            [
                tta.HorizontalFlip(),
                tta.Multiply(factors=[0.9, 1, 1.1]),
            ]
        )
        cams = []
        for transform in transforms:
            augmented_tensor = transform.augment_image(input_tensor)
            cam = self.forward(augmented_tensor,
                               targets,
                               eigen_smooth)

            # The ttach library expects a tensor of size BxCxHxW
            cam = cam[:, None, :, :]
            cam = torch.from_numpy(cam)
            cam = transform.deaugment_mask(cam)

            # Back to numpy float32, HxW
            cam = cam.numpy()
            cam = cam[:, 0, :, :]
            cams.append(cam)

        cam = np.mean(np.float32(cams), axis=0)
        return cam

    def __call__(self,
                 input_tensor: torch.Tensor,
                 targets: List[torch.nn.Module] = None,
                 aug_smooth: bool = False,
                 eigen_smooth: bool = False) -> np.ndarray:

        # Smooth the CAM result with test time augmentation
        if aug_smooth is True:
            return self.forward_augmentation_smoothing(
                input_tensor, targets, eigen_smooth)

        return self.forward(input_tensor,
                            targets, eigen_smooth)

    def __del__(self):
        self.activations_and_grads.release()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, exc_tb):
        self.activations_and_grads.release()
        if isinstance(exc_value, IndexError):
            # Handle IndexError here...
            print(
                f"An exception occurred in CAM with block: {exc_type}. Message: {exc_value}")
            return True

In [None]:
import numpy as np

class GradCAM(BaseCAM):
    def __init__(self, model, target_layers, use_cuda=False,
                 reshape_transform=None):
        super(
            GradCAM,
            self).__init__(
            model,
            target_layers,
            use_cuda,
            reshape_transform)

    def get_cam_weights(self,
                        input_tensor,
                        target_layer,
                        target_category,
                        activations,
                        grads):
        return grads

## LayerCAM, Grad-CAM, FullGrad and others

The next cell does not need to be copied, just run

In [None]:
from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, LayerCAM, FullGrad
from pytorch_grad_cam.utils.image import show_cam_on_image, deprocess_image, preprocess_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

# Unsupervised segmentation pipeline

CAMERAS:

To use the CAMERAS method, you need to follow all the instructions from the text and activate all the cells in the section "CAMERAS".

---

Grad-CAM, mGrad-CAM and etc:

To use mGrad-CAM -> first go to the section "Implementation of all CAM methods used in the article, except CAMERAS and CCAM", go to the subsection "mGrad-CAM" and activate all the cells, then go back to this section and check in the function "get_saliency_map" set GradCAM.

---

To use other methods (the exceptions are CAMERAS and mGrad-CAM) -> first go to the section "Implementation of all CAM methods used in the article, except CAMERAS and CCAM", go to the subsection "LayerCAM, Grad-CAM, FullGrad and others" execute a single cell, then return to this section and set in the function "get_saliency_map" which CAM will we use.

---

If you change models every time, for example in this sequence::

1. Grad-CAM;
2. mGrad-CAM;
3. Layer-CAM.

Then be sure to do what is described above every time and for full confidence you can restart the environment.

If there is no mGrad-CAM in the sequence, then it is enough to do it once and then change the CAM method in get_saliency_map.

model.layerN[-1] - the last layer in the block;

model.layerN[-2] - the first layer in the block.

In [None]:
def get_saliency_map(model, image_name):
  # From which layers will the class activation maps be taken
  target_layers = [model.layer3[-1], model.layer4[-2]]

  normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
  )

  preprocess = transforms.Compose([
    transforms.Resize((resolution_model, resolution_model)),
    transforms.ToTensor(),
    normalize
  ])

  # load test image
  img_pil = Image.open(image_name).convert('RGB')

  img_tensor = preprocess(img_pil)
  img_variable = Variable(img_tensor.unsqueeze(0))

  input_tensor = img_variable.cuda()

  # This specifies which CAM method will be used
  cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)

  grayscale_cam = cam(input_tensor=input_tensor)
  # In this example grayscale_cam has only one image in the batch:
  grayscale_cam = grayscale_cam[0, :]

  return grayscale_cam

In [None]:
def iou_pytorch(outputs: torch.Tensor, labels: torch.Tensor):
    outputs = outputs.int()
    labels = labels.int()

    SMOOTH = 1e-8
    intersection = (outputs & labels).float().sum((1, 2))  # Will be zero if Truth=0 or Prediction=0
    union = (outputs | labels).float().sum((1, 2))         # Will be zero if both are 0

    iou = (torch.sum(intersection) + SMOOTH) / (torch.sum(union) + SMOOTH)  # We smooth(epsilon) our devision to avoid 0/0

    return iou

In [None]:
def get_masks(saliency_map, path_to_image, coefficient = 10, resolution = 224, visualization = False):
  if not isinstance(saliency_map, np.ndarray):
    saliency_map = cv2.applyColorMap(np.uint8(255 * saliency_map.detach().numpy().squeeze()), cv2.COLORMAP_JET)
  else:
    saliency_map = cv2.applyColorMap(np.uint8(255 * saliency_map), cv2.COLORMAP_JET)
  
  hsv = cv2.cvtColor(saliency_map, cv2.COLOR_BGR2HSV)

  # Here we define the blue color range in HSV
  lower_blue = np.array([coefficient,50,50])
  upper_blue = np.array([130,255,255])

  # This method creates a blue mask of the objects found in the frame
  mask = cv2.inRange(hsv, lower_blue, upper_blue)
  
  # Inverting the mask
  for i in range(len(mask)):
    for j in range(len(mask[i])):
      if mask[i][j] == 255: mask[i][j] = 0
      else: mask[i][j] = 255

  if visualization == True:
    image = cv2.imread(path_to_image)
    
    img = image.copy()
    img[mask == 255] = 0
    color_channeled_image = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
    result = color_channeled_image * 0.4 + img * 0.7
    mask_to_seg = saliency_map * 0.5 + image * 0.5
    
    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16, 16))
    ax1.set_title('Original picture')
    ax2.set_title('The mask was created using CAM methods')
    ax1.axis('off')
    ax2.axis('off')
    _ = ax1.imshow(cv2.cvtColor(mask_to_seg.astype("uint8"), cv2.COLOR_BGR2RGB))
    _ = ax2.imshow(cv2.cvtColor(result.astype("uint8"), cv2.COLOR_BGR2RGB))

  file_name = path_to_image.split('/')[-1]
  # Second argument - gives Grey Scale Image
  real_mask = cv2.imread("data/mask/"+ path_to_image.split('/')[2] + "/" + file_name.split('.')[0] + ".png", 0)
  real_mask = cv2.resize(real_mask, (resolution, resolution), interpolation=cv2.INTER_NEAREST)

  for i in range(len(real_mask)):
    for j in range(len(real_mask[i])):
      if real_mask[i][j] > 0: real_mask[i][j] = 255
      else: real_mask[i][j] = 0
  
  real_mask = torch.Tensor(real_mask)
  mask_tensor = torch.Tensor(mask)

  return real_mask, mask_tensor

In [None]:
def get_iou_category(category, model, coefficient):
  gt_list = []
  pred_list = []
  root = ""
  dir_name = "data/images/" + category
  for root, dirs, files in os.walk(os.path.join(root, dir_name)):
    for file in files:
      path_to_image = os.path.join(root, file)
      
      # If CAMERAS, then use computeAndSaveMaps() function, the rest CAM - get_saliency_map()
      map_to_mask = get_saliency_map(model, path_to_image)
      #map_to_mask = computeAndSaveMaps(model, path_to_image)
      
      gt_one, pred_one = get_masks(map_to_mask, path_to_image, coefficient=coefficient, resolution=resolution_model)
      gt_list.append(gt_one)
      pred_list.append(pred_one)
  
  gt_batch = torch.stack(gt_list)
  pred_batch = torch.stack(pred_list)
  iou = iou_pytorch(pred_batch, gt_batch).item()
  return iou

When coefficient > 120, regardless of the method, it returns a mask where the entire area is filled in.

Select the coefficient using the for loop in the code below.

cam_method_name - the variable responsible for the name of the method, **it must be changed**, since a dictionary with the values IoU and Coefficient is formed from this name.

Change the CAM class and the layers that will be rendered in the function get_saliency_map(..), which is located at the beginning of the section "Unsupervised segmentation pipeline".

Changes for CAMERAS need to be made to the function computeAndSaveMap(), which is located in the section "CAMERAS".

Example from cam_method_name = 'mGradCAM-3.2and4.1'

In [None]:
cam_method_name = 'mGradCAM-3.2and4.1'

category_name = 'drops'

# Load the pre-trained model
model = resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 7) # second parameter - number of classes
torch.nn.init.xavier_normal_(model.fc.weight)
model.load_state_dict(torch.load('ResNet18_Unfreeze3&4_4aug140epoch_model.pth')) # path to weights
model.eval()
model = model.cuda()

category_classes = [category_name]
category_best_iou = {}
category_best_coefficient = {}

for class_name in category_classes:
  category_best_iou[class_name] = 0
  category_best_coefficient[class_name] = 0

for coefficient in range(1,100):
  print("Coefficient: ", coefficient)
  for class_name in category_classes:
    value_category_iou = get_iou_category(class_name, model, coefficient=coefficient)

    if category_best_iou.get(class_name) < value_category_iou:
      category_best_iou[class_name] = value_category_iou
      category_best_coefficient[class_name] = coefficient
    
# Saving the results
with open(cam_method_name + '_best_iou', 'w') as f: 
  json.dump(category_best_iou, f)

with open(cam_method_name + '_best_coefficient', 'w') as f: 
  json.dump(category_best_coefficient, f)

Execute the following cell if you want to save to google drive

In [None]:
cmd = 'cp ' + cam_method_name + '_best_iou' + ' gdrive/MyDrive/CaUS/CAM_IoU/'
os.system(cmd)

cmd = 'cp ' + cam_method_name + '_best_coefficient' + ' gdrive/MyDrive/CaUS/CAM_Coefficient/'
os.system(cmd)

## Creating masks with the best coefficients

If you received the results in the last session and want to get their masks, transfer the data from Google drive using the following cell

In [None]:
!cp -R gdrive/MyDrive/CaUS/CAM_IoU/ /content/
!cp -R gdrive/MyDrive/CaUS/CAM_Coefficient/ /content/

In [None]:
def get_masks(saliency_map, path_to_image, coefficient = 10, resolution = 224, path_to_dir = '', visualization=False):
  if not isinstance(saliency_map, np.ndarray):
    saliency_map = cv2.applyColorMap(np.uint8(255 * saliency_map.detach().numpy().squeeze()), cv2.COLORMAP_JET)
  else:
    saliency_map = cv2.applyColorMap(np.uint8(255 * saliency_map), cv2.COLORMAP_JET)
  
  hsv = cv2.cvtColor(saliency_map, cv2.COLOR_BGR2HSV)

  # Here we define the blue color range in HSV
  lower_blue = np.array([coefficient,50,50])
  upper_blue = np.array([130,255,255])

  # This method creates a blue mask of the objects found in the frame
  mask = cv2.inRange(hsv, lower_blue, upper_blue)
  
  # Inverting the mask
  for i in range(len(mask)):
    for j in range(len(mask[i])):
      if mask[i][j] == 255: mask[i][j] = 0
      else: mask[i][j] = 255

  if visualization == True:
    image = cv2.imread(path_to_image)
    
    img = image.copy()
    img[mask == 255] = 0
    color_channeled_image = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
    result = color_channeled_image * 0.4 + img * 0.7
    mask_to_seg = saliency_map * 0.5 + image * 0.5
    
    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16, 16))
    ax1.set_title('Original picture')
    ax2.set_title('The mask was created using CAM methods')
    ax1.axis('off')
    ax2.axis('off')
    _ = ax1.imshow(cv2.cvtColor(mask_to_seg.astype("uint8"), cv2.COLOR_BGR2RGB))
    _ = ax2.imshow(cv2.cvtColor(result.astype("uint8"), cv2.COLOR_BGR2RGB))

  file_name = path_to_image.split('/')[-1]
  # Second argument - gives Grey Scale Image
  real_mask = cv2.imread("data/mask/"+ path_to_image.split('/')[2] + "/" + file_name.split('.')[0] + ".png", 0)
  real_mask = cv2.resize(real_mask, (resolution, resolution), interpolation=cv2.INTER_NEAREST)

  for i in range(len(real_mask)):
    for j in range(len(real_mask[i])):
      if real_mask[i][j] > 0: real_mask[i][j] = 255
      else: real_mask[i][j] = 0

  path_for_class_dir = path_to_dir + '/' + file_name.split('_')[0]

  try:
    os.mkdir(path_for_class_dir)
  except:
    pass
  
  cv2.imwrite(path_for_class_dir + '/pred_' + file_name.split('.')[0] + '.png', mask)
  cv2.imwrite(path_for_class_dir + '/gt_' + file_name.split('.')[0] + '.png', real_mask)
  cv2.imwrite(path_for_class_dir + '/map_' + file_name.split('.')[0] + '.png', saliency_map)
  
  real_mask = torch.Tensor(real_mask)
  mask_tensor = torch.Tensor(mask)

  return real_mask, mask_tensor

In [None]:
def get_saliency_map(model, image_name, cam_method_name):
  
  if cam_method_name.split('-')[1] == '4.1and4.2':
    target_layers = [model.layer4[-2], model.layer4[-1]]
  elif cam_method_name.split('-')[1] == '4.2':
    target_layers = [model.layer4[-1]]
  elif cam_method_name.split('-')[1] == '3.2and4.1':
    target_layers = [model.layer3[-1], model.layer4[-2]]
  elif cam_method_name.split('-')[1] == '3.1and3.2and4.1and4.2':
    target_layers = [model.layer3[-2], model.layer3[-1], model.layer4[-2], model.layer4[-1]]
  elif cam_method_name.split('-')[1] == '3.2and4.2':
    target_layers = [model.layer3[-1], model.layer4[-1]]
  elif cam_method_name.split('-')[1] == '3.2and4.1and4.2':
    target_layers = [model.layer3[-1], model.layer4[-2], model.layer4[-1]]
  elif cam_method_name.split('-')[1] == '3.1and4.2':
    target_layers = [model.layer3[-2], model.layer4[-1]]
  elif cam_method_name.split('-')[1] == '3.1and4.1':
    target_layers = [model.layer3[-2], model.layer4[-2]]


  normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
  )

  preprocess = transforms.Compose([
    transforms.Resize((resolution_model,resolution_model)),
    transforms.ToTensor(),
    normalize
  ])

  # load test image
  img_pil = Image.open(image_name).convert('RGB')

  img_tensor = preprocess(img_pil)
  img_variable = Variable(img_tensor.unsqueeze(0))

  input_tensor = img_variable.cuda()

  # This specifies which CAM method will be used
  if cam_method_name.split('-')[0] == 'GradCAM' or cam_method_name.split('-')[0] == 'mGradCAM':
    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)
  elif cam_method_name.split('-')[0] == 'LayerCAM':
    cam = LayerCAM(model=model, target_layers=target_layers, use_cuda=True)

  grayscale_cam = cam(input_tensor=input_tensor)
  # In this example grayscale_cam has only one image in the batch:
  grayscale_cam = grayscale_cam[0, :]

  return grayscale_cam

In [None]:
def get_iou_category(category, model, coefficient, cam_method_name, path_to_dir):
  root = ""
  dir_name = "data/images/" + category
  for root, dirs, files in os.walk(os.path.join(root, dir_name)):
    for file in files:
      path_to_image = os.path.join(root, file)

      # If CAMERAS, then use computeAndSaveMaps() function, the rest CAM - get_saliency_map()
      #map_to_mask = get_saliency_map(model, path_to_image)
      map_to_mask = computeAndSaveMaps(model, path_to_image)
      
      gt_one, pred_one = get_masks(map_to_mask, path_to_image, coefficient=coefficient, resolution=resolution_model, path_to_dir=path_to_dir)

In [None]:
import os
absolute_path_dir = "CAM_Coefficient/"
absolute_dir_names = os.listdir(absolute_path_dir)

print(absolute_dir_names)

['GradCAM-4.1and4.2_best_coefficient', 'LayerCAM-3.1and4.1_best_coefficient', 'LayerCAM-4.1and4.2_best_coefficient', 'mGradCAM-4.1and4.2_best_coefficient', 'mGradCAM-3.1and3.2and4.1and4.2_best_coefficient', 'mGradCAM-3.1and4.1_best_coefficient', 'GradCAM-3.2and4.2_best_coefficient', 'LayerCAM-3.1and3.2and4.1and4.2_best_coefficient', 'mGradCAM-3.2and4.1_best_coefficient', 'LayerCAM-3.2and4.1and4.2_best_coefficient', 'CAMERAS-custom-res_best_coefficient', 'LayerCAM-3.2and4.1_best_coefficient', 'LayerCAM-4.2_best_coefficient', 'mGradCAM-3.1and4.2_best_coefficient', 'LayerCAM-3.1and4.2_best_coefficient', 'CAMERAS-classic_best_coefficient', 'LayerCAM-3.2and4.2_best_coefficient', 'GradCAM-4.2_best_coefficient', 'mGradCAM-3.2and4.2_best_coefficient']


In [None]:
# GradCAM and LayerCAM
# name_dict_list = ['GradCAM-4.1and4.2_best_coefficient', 'LayerCAM-4.1and4.2_best_coefficient', 'GradCAM-4.2_best_coefficient', 'LayerCAM-3.2and4.1_best_coefficient', 'LayerCAM-3.1and3.2and4.1and4.2_best_coefficient', 'LayerCAM-4.2_best_coefficient', 'GradCAM-3.2and4.2_best_coefficient', 'LayerCAM-3.2and4.1and4.2_best_coefficient', 'LayerCAM-3.2and4.2_best_coefficient', 'LayerCAM-3.1and4.2_best_coefficient', 'LayerCAM-3.1and4.1_best_coefficient']

# mGradCAM
# name_dict_list = ['mGradCAM-4.1and4.2_best_coefficient', 'mGradCAM-3.1and3.2and4.1and4.2_best_coefficient', 'mGradCAM-3.1and4.1_best_coefficient', 'mGradCAM-3.2and4.1_best_coefficient', 'mGradCAM-3.1and4.2_best_coefficient', 'mGradCAM-3.2and4.2_best_coefficient']

# CAMERAS classic
# name_dict_list = ['CAMERAS-classic_best_coefficient']

# CAMERAS custom
name_dict_list = ['CAMERAS-custom-res_best_coefficient']

In [None]:
!mkdir paper

In [None]:
# Load the pre-trained model
model = resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 7) # second parameter - number of classes
torch.nn.init.xavier_normal_(model.fc.weight)
model.load_state_dict(torch.load('ResNet18_Unfreeze3&4_4aug140epoch_model.pth')) # path to weights
model.eval()
model = model.cuda()

category_name = 'drops'

cam_method_list = []
category_classes = [category_name]

for name_dict in name_dict_list:
  cam_method_list.append(name_dict.split('_')[0])

for cam_method_name in cam_method_list:
  path_to_cam_method_name = 'paper/' + cam_method_name

  with open('CAM_Coefficient/' + cam_method_name + '_best_coefficient', 'r') as f:
    category_best_coefficient = json.load(f)
  
  try:
    os.mkdir(path_to_cam_method_name)
  except:
    pass
  
  for class_name in category_classes:
    coefficient = category_best_coefficient.get(class_name)
    value_category_iou = get_iou_category(class_name, model, coefficient=coefficient,cam_method_name = cam_method_name,path_to_dir=path_to_cam_method_name)

In [None]:
!zip -r paper.zip paper/