## Example - RobustnessTest

This notebook shows the functionality of the RobustnessTest.

In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [300]:
!pip install captum
!pip install opencv-python

import torch
import torchvision
from torchvision import transforms
import numpy as np
import h5py
from tqdm import tqdm
from captum.attr import Saliency, IntegratedGradients
from pathlib import Path
import warnings

# Retrieve source code.
from drive.MyDrive.Projects.xai_quantification_toolbox import * #import xaiquantificationtoolbox

# Notebook settings.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
warnings.filterwarnings("ignore", category=UserWarning)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load model, data and attributions.

In [16]:
# Load pre-trained ResNet18 model.
model = torchvision.models.resnet18(pretrained=True)

# Load test data and loaders.
test_set = torchvision.datasets.ImageFolder(root='/content/drive/My Drive/imagenet_images', 
                                            transform=transforms.Compose([transforms.Resize(256),
                                                                          transforms.CenterCrop((224, 224)),
                                                                          transforms.ToTensor(),
                                                                          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]))
test_loader = torch.utils.data.DataLoader(test_set, shuffle=True, batch_size=64)


# Evaluate model performance.
model.eval()
predictions, labels = evaluate_model(model.to(device), data=test_loader, device=device)
print(f"\nModel test accuracy: {(100 * score_model(predictions, labels)):.2f}%")

# Load data, targets and attributions.
x_batch, y_batch = iter(test_loader).next()
a_batch_saliency = explain(model.to(device), x_batch.to(device), y_batch.to(device), "Gradient")


Model test accuracy: 68.83%


In [25]:
# Plot some explanations!
import matplotlib.pyplot as plt

for i in range(10): #[4140, 2091, 78, 1195]: 
    plt.imshow(denorm(x_batch.cpu().data[i]).transpose(0, 1).transpose(1, 2))
    plt.show()
    plt.imshow(a_batch_saliency.cpu().data[i], cmap="seismic")
    plt.colorbar()
    plt.show()


Output hidden; open in https://colab.research.google.com to view.

### Option 1. Evaluate the robustness of attributions in one line of code.

In [None]:
# One-liner to measure robustness of provided attributions.
scores = RobustnessTest(**{
    "similarity_function": lipschitz_constant,
    "perturbation_function": gaussian_noise,
})(model=model, x_batch=x_batch.cpu().numpy(), y_batch=y_batch.cpu().numpy(), a_batch=a_batch_saliency.cpu().numpy(), device=device, **{"xai_method": "Saliency"})
scores

In [423]:
# One-liner to measure robustness of provided attributions.
scores = ContinuityTest(**{
    "similarity_function": correlation_spearman,
    "perturbation_function": translation_x_direction,
    "nr_patches": 4,
    "nr_steps": 10,
})(model=model, x_batch=x_batch.cpu().numpy(), y_batch=y_batch.cpu().numpy(), a_batch=a_batch_saliency.cpu().numpy(), device=device, **{"xai_method": "Saliency"})
scores

### Option 2. Evaluate the robustness of provided attributions while enjoying more functionality of Quantifier and Plotting.

In [None]:
# Provide notebooks for the different use cases: compare models, XAI methods, different measures
# ...

In [None]:
# Specify the tests.
tests = [RobustnessTest(**{
    "similarity_function": similarity_fn,
    "perturbation_function": gaussian_blur,
}) for similarity_fn in [lipschitz_constant, distance_euclidean, cosine]]

# Load attributions of another explanation method.
a_batch_intgrad = IntegratedGradients(model).attribute(inputs=x_batch, targets=y_batch)

# Init the quantifier object.
quantifier = Quantifier(measures=tests, io_object=h5py.File("PATH_TO_H5PY_FILE"), checkpoints=..)

# Score the tests.
results = [quantifier.score(model=model, x_batch=x_batch, y_batch=y_batch, a_batch=a_batch)
           for a_batch in [a_batch_saliency, a_batch_intgrad]]

# Plot Saliency vs Integrated Gradients.
Plotting(results, show=False, path_to_save="PATH_TO_SAVE_FIGURE")