# Feature Visualization

(c) 2021 Fabian Offert

## Colab Setup

Run the below commands only if you imported this notebook into Google Colab! Also **go to Runtime/Change runtime type and pick "GPU" as the hardware accelerator!**

In [6]:
!rm -rf minimal # In case this is re-run
!git clone https://github.com/zentralwerkstatt/minimal
!cp ./minimal/synset_words.txt ./

Cloning into 'minimal'...
remote: Enumerating objects: 10, done.[K
remote: Counting objects: 100% (10/10), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 10 (delta 0), reused 4 (delta 0), pack-reused 0[K
Unpacking objects: 100% (10/10), done.


In [7]:
!nvidia-smi # Check what kind of GPU we got

Thu May  6 12:40:19 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    33W / 250W |   2099MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Imports

We are using PyTorch, the de-facto standard for high-level prototyping for machine learning. Because we are operating in high-dimensional vector space, we are also using Numpy, the Python library for scientific computing. Finally, we are importing several ready-to-use image filters, and some helper libraries.

In [8]:
import torch as t
import torch.nn as nn
import torch.nn.functional as F
import torchvision as tv

import numpy as np

from scipy.ndimage.filters import gaussian_filter, median_filter
from skimage.restoration import denoise_bilateral, denoise_tv_chambolle
import PIL.Image, PIL.ImageChops

import os
import random
from io import BytesIO
from IPython import display

## Model to investigate

This may take a while, as the pre-trained weights have to be loaded on first run.

In [9]:
device = t.device("cuda:0" if t.cuda.is_available() else "cpu") # Use GPU if available
f1 = tv.models.inception_v3(pretrained=True).to(device)
f2 = tv.models.vgg16(pretrained=True).to(device)
f3 = tv.models.vgg19(pretrained=True).to(device)
# Test mode: we do not want to train the model (i.e. change its weights) at any point
f1.eval()
f2.eval()
f3.eval()
model_names = {'f1':'Inception V3', 'f2':'VGG16', 'f3': 'VGG19'}

## Helper functions

Among other things, these helper functions allow us to convert between PyTorch tensors, NumPy arrays, and PIL images.

In [10]:
# Show an image within a Jupyter environment
# Can do PyTorch tensors, NumPy arrays, and PIL images
def show_img(img, title='', fmt='jpeg'):
    if type(img) is np.ndarray:
        img = PIL.Image.fromarray(img)
    elif type(img) is t.Tensor:
        img = deprocess(img)
    out = BytesIO()
    if title: print(title)
    img.save(out, fmt)
    display.display(display.Image(data=out.getvalue()))

# PyTorch is channels first, this happens here!
preprocess = tv.transforms.Compose([tv.transforms.ToTensor()])
    
# Reverse of preprocess, PyTorch tensor to PIL image
def deprocess(tensor):
    # Clone tensor first, otherwise we are NOT making a copy by using .cpu()!
    img = t.clone(tensor)
    img = img.cpu().data.numpy().squeeze() # Get rid of batch dimension
    img = img.transpose((1, 2, 0)) # Channels first to channels last
    
    # We are not using ImageNet images as input
    # mean = np.array([0.485, 0.456, 0.406]) 
    # std = np.array([0.229, 0.224, 0.225]) 
    # img = std * img + mean

    # No clipping, adversarial regulation should take care of this
    # img = np.clip(img, 0, 1)
    
    # 0./1. range to 0./255. range
    img *= 255
    
    img = img.astype(np.uint8)
    img = PIL.Image.fromarray(img)
    return img

# Return a gray square PIL image
def gray_square(size):
    # Gray square, -1./1. range
    img = np.random.normal(0, 0.01, (size, size, 3)) 
    
    # -1./1. range to 0./255. range
    img /= 2.
    img += 0.5
    img *= 255.

    img = img.astype(np.uint8)
    img = PIL.Image.fromarray(img)
    return img

# Load ImageNet classes
with open('synset_words.txt') as synset_words_file:
    synset_words = synset_words_file.readlines()
for i, line in enumerate(synset_words):
    synset_words[i] = line.replace(' ', '_').replace(',', '_').lower().strip()

# Classify an image with the target model 
# Can do PyTorch tensors and PIL images
def predict(img, model):
    if type(img) is t.Tensor:
        preds = model(img.to(device))
    else:
        preds = model(preprocess(img).unsqueeze(0).to(device))
    preds_softmax_np = F.softmax(preds, dim=1).cpu().data.numpy()
    # Returns class no., class name, and prediction confidence
    return preds_softmax_np.argmax(), synset_words[preds_softmax_np.argmax()], preds_softmax_np.max()

# "Rolling" list: whenever an item is added, the first item is discarded
def destructive_append(l,i):
    l=l[1:]
    l.append(i)
    return l

# PyTorch and skimage use different channel ordering
def pytorch_to_skimage(img):
    # No batch dimension
    img = img[0]
    # Channels last
    img = np.swapaxes(img, 0, 2)
    return img
    
def skimage_to_pytorch(img):
    # Channels first
    img = np.swapaxes(img, 0, 2)
    # Skimage uses double
    img = img.astype(np.float32)
    # No Batch dimension
    img = np.expand_dims(img, 0)
    return img

# Filters for feature visualization
def filter_median(npimg, params):
    npimg = median_filter(npimg, size=(1, 1, params['fsize'], params['fsize']))  
    return npimg

def filter_bilateral(npimg, params):
    npimg = pytorch_to_skimage(npimg)
    npimg = denoise_bilateral(npimg, sigma_color=0.05, sigma_spatial=15, multichannel=True)
    npimg = skimage_to_pytorch(npimg)
    return npimg

def filter_TV(npimg, params):
    npimg = pytorch_to_skimage(npimg)
    npimg = denoise_tv_chambolle(npimg, weight=0.1, multichannel=True)
    npimg = skimage_to_pytorch(npimg)
    return npimg

## Gradient Ascent

In [11]:
def gradient_ascent(img, neuron, model):

    ITERATIONS = 2000
    # FILTERS = [{'function':filter_median, 'frequency':4, 'params':{'fsize':5}}] # Good parameters
    FILTERS = [{'function':filter_TV, 'frequency':20, 'params':{}}] # Good parameters
    JITTER = 32
    LR = 0.4
    L2 = 1e-4 # Yosinski weight decay
            
    input = preprocess(img).unsqueeze(0).to(device).requires_grad_()
    optimizer = t.optim.SGD([input], lr=LR, weight_decay=L2)
    
    for i in range(ITERATIONS):
        
        optimizer.zero_grad()
        
        # Centers the object in the image
        if JITTER:
            npimg = input.data.cpu().numpy() # To CPU and numpy
            ox, oy = np.random.randint(-JITTER, JITTER+1, 2)
            npimg = np.roll(np.roll(npimg, ox, -1), oy, -2) # Jitter
            input.data = t.from_numpy(npimg).to(device)

        x = model(input)
        loss = -x[:,neuron]

        preds_softmax_np = F.softmax(x, dim=1).cpu().data.numpy()
        confidence = preds_softmax_np[:,neuron]
                    
        if i%50 == 0: 
            print(f'Iterations: {i}, loss: {loss.item()}, pred.: {synset_words[preds_softmax_np.argmax()]}, conf.: {confidence}')

        loss.backward()
        optimizer.step()
        
        # Centers the object in the image
        if JITTER:
            npimg = input.data.cpu().numpy() # To CPU and numpy
            npimg = np.roll(np.roll(npimg, -ox, -1), -oy, -2) # Jitter
            input.data = t.from_numpy(npimg).to(device)
            
        # Stochastic clipping
        input.data[input.data > 1] = np.random.uniform(0, 1)
        input.data[input.data < 0] = np.random.uniform(0, 1)
        
        # Filtering
        for filter_ in FILTERS:
            if i != ITERATIONS - 1: # No regularization on last iteration for good quality output
                if i % filter_['frequency'] == 0:
                    npimg = input.data.cpu().numpy() # To CPU and numpy
                    npimg = filter_['function'](npimg, filter_['params'])
                    input.data = t.from_numpy(npimg).to(device)
        # Verbose
        if i%50==0:
          show_img((input))

    return input

In [None]:
noise = gray_square(299)
img = gradient_ascent(noise, 1, f1)