<a href="https://colab.research.google.com/github/venkatasl/AIML_TRAINING_VENKAT/blob/venkat_creation/DRDO2024_FeatureInversion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Feature inversion

What does a model see when it looks at an image? This is what we find out using feature inversion.

We take the features of the model at a particular layer and try to reconstruct the input image from it.

In [None]:
# import everything
import torch
import torchvision.transforms as transforms
import torchvision.transforms.v2 as v2
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt
import math

gpu = False # turn to True if using a gpu kernel

In [None]:
# Load a pre-trained model
model = models.vgg16(pretrained=True)

# put the model on the gpu
if gpu:
  model.cuda()


VGG16 has 39 layers in total. Let us write a function to propagate an input forward to the desired layer and get the features from there.



In [None]:
def get_features(model, input, layernum):
  index = 0
  for layer in model.features.children():
    input = layer(input)
    if index==layernum:
      return input
    index += 1

  input = model.avgpool(input)
  if index==layernum:
    return input
  index += 1

  input = torch.flatten(input, 1)

  for layer in model.classifier.children():
    input = layer(input)
    if index==layernum:
      return input
    index += 1
  return index # if layernum is bigger than the number of layers, it will return how many layers are there


Let us define some helper functions:

In [None]:
# Define the image transformation. This is important because this is how the model was trained
preprocess = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    # The below values are based on the mean and st.deviation of the ImageNet dataset
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# inverse of the preprocess transform
inversetransform = transforms.Compose([
        transforms.Normalize(
    mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
    std=[1/0.229, 1/0.224, 1/0.255]),
        transforms.ToPILImage()])

# Function to load and preprocess the image
def load_image(image_path, display=False):
    if image_path.startswith('http'):
        response = requests.get(image_path)
        img = Image.open(BytesIO(response.content))
    else:
        img = Image.open(image_path)
    img = preprocess(img)
    if display:
      plt.imshow(inversetransform(img))

    img = img.unsqueeze(0)  # Add batch dimension
    return img

Let us define some loss functions and regularization functions:

In [None]:
def tv(t,beta=2):
    #gets the smoothness or the edginess of an image
    tv_x = t[:,:,:,1:]-t[:,:,:,:-1]
    tv_y = t[:,:,1:,:]-t[:,:,:-1,:]

    tv_x = tv_x[:,:,:-1,:]
    tv_y = tv_y[:,:,:,:-1]
    if False: print(tv_x)
    tv_2 = tv_x **2 + tv_y **2
    tv = tv_2.pow(beta/2.)
    total = tv.sum()
    return total

def tv2(t):
    #specifically with beta =2
    tv_x = t[:,:,:,1:]-t[:,:,:,:-1]
    tv_y = t[:,:,1:,:]-t[:,:,:-1,:]

    tv_x = tv_x[:,:,:-1,:]
    tv_y = tv_y[:,:,:,:-1]
    tv_x_2 = (tv_x **2).sum()
    tv_y_2 = (tv_y **2).sum()

    if False: print(tv_x)
    tv_2 =  tv_x_2 + tv_y_2
    total = tv_2.sum()
    return total

def alpha_norm(t,alpha=6):
    a = torch.sum(t.pow(alpha))
    return a

def mse(ref, x):
    return torch.nn.functional.mse_loss(ref,x)

def total(ref_feat, x_feat, x, alpha_lambda, tv_lambda ):
    return mse(ref_feat,x_feat) + alpha_lambda*alpha_norm(x) + tv_lambda*tv2(x);
    #return mse(ref_feat, x_feat)



Inversion function:

In [None]:
def invert_image(model, input, layernum, alpha_lambda, tv_lambda,lr, iterations=1000):
  x = torch.rand(1,3,224,224) # this is the random image we will optimize
  if gpu:
    x = x.cuda()
  x.requires_grad=True
  optimizer = Adam([x], lr=0.5, weight_decay=1e-6) # create an optimizer for the image

  # get the reference features
  ref_feats = get_features(model, input, layernum).detach() # don't want to keep extra gradients

  # let us optimize for a 1000 steps
  for i in range(iterations):
    x_feats = get_features(model, x, layernum)
    loss = total(ref_feats, x_feats, x , alpha_lambda, tv_lambda)
    optimizer.zero_grad();
    loss.backward()
    optimizer.step();
    print(f'\r {i} of {iterations} iterations complete', end = '     ')

  return inversetransform(x.squeeze())

# Try it with an image!

In [None]:
# Let us load an image
# image_path = 'https://raw.githubusercontent.com/pytorch/serve/refs/heads/master/examples/image_classifier/kitten.jpg'  # Replace with your image path or URL
image_path = 'https://www.pixelstalk.net/wp-content/uploads/2016/03/Animals-baby-cat-dog-HD-wallpaper.jpg'
img = load_image(image_path, display=True)

In [None]:
# invert the image using features from the first layer
if gpu:
  img = img.cuda()
invimg = invert_image(model, img, 0, 0,0,0.1, iterations=1000)
plt.imshow(invimg)

Let us try for all the convolution layers. I have some values for alpha_lambda and tv_lambda I have figured out earlier

In [None]:
convlayers = [0,2,5,7,10,12,14,17,19,21,24,26,28]
tvparams = [0,0,1e-6,1e-4,1e-4,1e-2,1e-2,1e-2,1e-2,1e-3,1e-3,1e-4,1e-5]
alphaparams = [0,0,0,1e-6,1e-6,1e-4,1e-4,1e-5,1e-5,1e-4,1e-4,1e-5,0]
if gpu:
  img = img.cuda
for i in range(len(convlayers)):
  print('Layer ', convlayers[i])
  plt.imshow(invert_image(model, img,convlayers[i], alphaparams[i], tvparams[i], 0.1  ))
  plt.show()

# Exercises
1. Try with different images
2. Try changing the alpha_lambda and tv_lambda values and getting better results for the higher layers