In [1]:
%matplotlib inline

import glob
import numpy as np
import skimage.transform
import torch
import torch.nn as nn
import torch.nn. functional as F

from PIL import Image
from matplotlib.pyplot import imshow
from torchvision import models, transforms

from cam import CAM
from visualize import visualize, reverse_normalize
from model.resnet import resnet50

In [2]:
images = sorted(glob.glob('./sample/*.jpg'))

In [3]:
clip = []
for i in images:
    img = Image.open(i)
    img = transforms.functional.to_tensor(img)
    # normalize using mean and std from ImageNet
    img = transforms.functional.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    clip.append(img)
    
clip = torch.stack(clip)
clip = clip.unsqueeze(0).transpose(1, 2)

In [4]:
clip.shape

torch.Size([1, 3, 16, 224, 224])

In [5]:
model = resnet50()

  m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')


In [6]:
model = nn.DataParallel(model)

In [7]:
model.load_state_dict(torch.load('./resnet-50-kinetics.pth', map_location=lambda storage, loc: storage)['state_dict'])

In [8]:
model = model.module

In [9]:
model.eval()

ResNet(
  (conv1): Conv3d(3, 64, kernel_size=(7, 7, 7), stride=(1, 2, 2), padding=(3, 3, 3), bias=False)
  (bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv3d(64, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
      (bn2): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv3d(64, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (bn3): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv3d(64, 256, ker

In [10]:
target_layer = model.layer4[-1].conv3

In [13]:
model = CAM(model, target_layer)

In [14]:
cam = model(clip)

predicted action ids 391	 probability 0.07258863747119904


In [15]:
clip = reverse_normalize(clip)

In [16]:
heatmaps = visualize(clip, cam)

In [17]:
heatmaps.shape

torch.Size([1, 3, 16, 224, 224])

In [18]:
# save image
from torchvision.utils import save_image
for i in range(16):
    heatmap = heatmaps[:, :, i].squeeze()
    save_image(heatmap, './cam/cam{:0>3}.png'.format(str(i)))