## Generating GRAD-CAM visualisations for Surprisingness estimation

In [9]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

all_test_images_ai = []
all_test_images_human = []

import os
for dirname, _, filenames in os.walk('/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/ai'):
    for filename in filenames:
        all_test_images_ai.append(os.path.join(dirname, filename))

for dirname, _, filenames in os.walk('/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/human'):
    for filename in filenames:
        all_test_images_human.append(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [10]:
all_test_images_ai.sort(key = lambda x: x.split('/')[-1].replace('_SDXL.jpg', ''))
all_test_images_ai[:5]

['/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/ai/21yQYShp34LSDXL.jpg',
 '/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/ai/313322Q12GLSDXL.jpg',
 '/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/ai/314SCTT53VLSDXL.jpg',
 '/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/ai/316Q04B445LSDXL.jpg',
 '/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/ai/317QRZJXXALSDXL.jpg']

In [11]:
all_test_images_human.sort(key = lambda x: x.split('/')[-1].replace('.jpg', ''))
all_test_images_human[:5]

['/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/human/21yQYShp34L.jpg',
 '/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/human/313322Q12GL.jpg',
 '/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/human/314SCTT53VL.jpg',
 '/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/human/316Q04B445L.jpg',
 '/kaggle/input/mumu-image-classification-album/mumu-images-classification/test/human/317QRZJXXAL.jpg']

## Load model

In [12]:
from typing import Optional, List
from PIL import Image
from io import BytesIO
import numpy as np
from glob import glob
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision import transforms

MODEL_PATH = "/kaggle/input/spectrogrand-public-release/kaggle-public-release/mobilenet_surprise_estimation.pt"
TORCH_DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"


# Load the surprise estimation pipeline
class CreativeNet(nn.Module):
    def __init__(self, train_baseline_classifier = False, num_output_classes = 2, dropout_rate = 0.20):
        super().__init__()
        
        # Set instance variables
        self.train_baseline_classifier = train_baseline_classifier
        self.num_outuput_classes = num_output_classes
        self.dropout_rate = dropout_rate
        
        # Set the current device for tensor calculations
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        
        # Baseline: MobileNet V3 small
        self.baseline = models.mobilenet_v3_small(weights = models.MobileNet_V3_Small_Weights.IMAGENET1K_V1)
        
        # Freeze the parameters of the base model (including but not limited to the last layers)
        for param in self.baseline.parameters():
            param.requires_grad = False
        
        if self.train_baseline_classifier:
            for param in self.baseline.classifier.parameters():
                param.requires_grad = True
                
        # Fully-connected block
        self.fc1 = nn.Linear(1000, 128)
        self.dropout1 = nn.Dropout(self.dropout_rate)
        self.fc2 = nn.Linear(128, 32)
        self.dropout2 = nn.Dropout(self.dropout_rate)
        self.fc3 = nn.Linear(32, self.num_outuput_classes)
        
    def forward(self, x):
        # Baseline
        x = x.to(self.device)
        x = self.baseline(x)
        
        # FC Block
        x = F.leaky_relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.leaky_relu(self.fc2(x))
        x = self.dropout2(x)
        x = F.leaky_relu(self.fc3(x))
        x = torch.sigmoid(x)
        return x
    
surprise_model_args = {
        "train_baseline_classifier" : False, 
        "num_output_classes" : 2,
        "dropout_rate" : 0.35
    }

"""
    @method get_surprise_score
        Get the surprise coefficient from a MUMU-trained model
    @param input_image_path: Path to the input SDXL image
    @param model_path: Path to the `.pt` file containing the surprise estimation model
    @note The model will be loaded with the `model.load_state_dict(torch.load(PATH))` moniker.
"""
def get_surprise_score(input_image_path:str, model_path:str) -> Optional[str]:
    try:
        global TORCH_DEVICE, surprise_model   

        # Transform the input image into a torch tensor @ref: https://www.projectpro.io/recipes/convert-image-tensor-pytorch
        transform = transforms.Compose([
                transforms.Resize((256, 256)),
                transforms.ToTensor(),
        ])

        img = Image.open(input_image_path).convert("RGB")
        transformed_img = transform(img=img)
        x = torch.Tensor(transformed_img)
        x = x.to(TORCH_DEVICE)

        # Load model
        surprise_model.load_state_dict(torch.load(model_path))
        surprise_model.to(TORCH_DEVICE)
        surprise_model.eval()

        # Compute outputs
        with torch.no_grad():
            outputs = surprise_model(x.unsqueeze(0))
            y = torch.softmax(outputs, dim = 1).detach().cpu()
            selected_score = float(y[0][1].item()) # Order of scores: ai, human
        return selected_score
    except Exception as e:
        print(f"Error while estimating surprise score of {input_image_path}: {e}")
        return None

In [13]:
surprise_model = CreativeNet(**surprise_model_args).to(TORCH_DEVICE)
surprise_model.load_state_dict(torch.load(MODEL_PATH))
surprise_model.to(TORCH_DEVICE)

CreativeNet(
  (baseline): MobileNetV3(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
            (activation): ReLU()
            (scale_activation): Hardsigmoid()
          )
          (2): Con

## Install GRAD-CAM package

In [14]:
!pip install -q grad-cam

## Generating outputs

In [15]:
from pytorch_grad_cam import GradCAM, HiResCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from torchvision.models import resnet50
from PIL import Image

for i in range(10):

    # Transform the input image into a torch tensor @ref: https://www.projectpro.io/recipes/convert-image-tensor-pytorch
    transform = transforms.Compose([
                    transforms.Resize((256, 256)),
                    transforms.ToTensor(),
    ])

    img = Image.open(all_test_images_ai[i]).convert("RGB")
    transformed_img = transform(img=img)
    x = torch.Tensor(transformed_img)
    x = x.to(TORCH_DEVICE)

    surprise_model.load_state_dict(torch.load(MODEL_PATH))
    surprise_model.to(TORCH_DEVICE)

    model = surprise_model.baseline
    target_layers = [model.features[-2]]
    # Note: input_tensor can be a batch tensor with several images!
    input_tensor = x.unsqueeze(0)

    # Construct the CAM object once, and then re-use it on many images:
    cam = AblationCAM(model=model, target_layers=target_layers)

    # You can also use it within a with statement, to make sure it is freed,
    # In case you need to re-create it inside an outer loop:
    # with GradCAM(model=model, target_layers=target_layers) as cam:
    #   ...

    # We have to specify the target we want to generate
    # the Class Activation Maps for.
    # If targets is None, the highest scoring category
    # will be used for every image in the batch.
    # Here we use ClassifierOutputTarget, but you can define your own custom targets
    # That are, for example, combinations of categories, or specific outputs in a non standard model.

    targets = [ClassifierOutputTarget(1)]

    img = np.array(img)
    resized_image =  img / 255.


    with torch.enable_grad():
        # You can also pass aug_smooth=True and eigen_smooth=True, to apply smoothing.
        grayscale_cam = cam(input_tensor=input_tensor, targets=targets)

        # In this example grayscale_cam has only one image in the batch:
        grayscale_cam = grayscale_cam[0, :]
        visualization = show_cam_on_image(resized_image.astype(np.float32), grayscale_cam, use_rgb=True)

        # You can also get the model outputs without having to re-inference
        model_outputs = cam.outputs
    
    img_save = Image.fromarray(visualization)
    img_save.save(f"sdxl_vis_human_label_{i}.png")

100%|██████████| 3/3 [00:00<00:00, 86.18it/s]
100%|██████████| 3/3 [00:00<00:00, 83.60it/s]
100%|██████████| 3/3 [00:00<00:00, 85.43it/s]
100%|██████████| 3/3 [00:00<00:00, 83.83it/s]
100%|██████████| 3/3 [00:00<00:00, 86.89it/s]
100%|██████████| 3/3 [00:00<00:00, 86.09it/s]
100%|██████████| 3/3 [00:00<00:00, 83.20it/s]
100%|██████████| 3/3 [00:00<00:00, 85.30it/s]
100%|██████████| 3/3 [00:00<00:00, 83.72it/s]
100%|██████████| 3/3 [00:00<00:00, 86.56it/s]


In [16]:
!ls

sdxl_vis_human_label_0.png  sdxl_vis_human_label_5.png
sdxl_vis_human_label_1.png  sdxl_vis_human_label_6.png
sdxl_vis_human_label_2.png  sdxl_vis_human_label_7.png
sdxl_vis_human_label_3.png  sdxl_vis_human_label_8.png
sdxl_vis_human_label_4.png  sdxl_vis_human_label_9.png
