In [1]:
%matplotlib inline

import glob
import h5py
import numpy as np
import io
import skimage.transform
import torch
import torch.nn as nn
import torch.nn. functional as F

from PIL import Image
from matplotlib.pyplot import imshow
from torchvision import models, transforms

from cam import GradCAM
from visualize import visualize, reverse_normalize
from model.slowfast import resnet152_NL

In [2]:
with h5py.File('1hnKlUAiyvE_000041_000051.hdf5', 'r') as f:
    video = f['video']
    clip = []
    n_frames = len(video)
    for i in range(n_frames):
        img = Image.open(io.BytesIO(video[i]))
        img = transforms.functional.center_crop(img, (224, 224))
        img = transforms.functional.to_tensor(img)
        img = transforms.functional.normalize(
            img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        clip.append(img)
        
clip = torch.stack(clip)
clip = clip.unsqueeze(0).transpose(1, 2)

In [3]:
clip.shape

torch.Size([1, 3, 240, 224, 224])

In [4]:
model = resnet152_NL(class_num=700)

In [5]:
model.load_state_dict(torch.load('./weights/slowfast152_nl_kinetics700.pth', map_location=lambda storage, loc: storage))

<All keys matched successfully>

In [6]:
model.eval()

SlowFast(
  (fast_conv1): Conv3d(3, 8, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False)
  (fast_bn1): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fast_relu): ReLU(inplace=True)
  (fast_maxpool): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False)
  (fast_res2): Sequential(
    (0): Bottleneck(
      (conv1): Conv3d(8, 8, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0), bias=False)
      (bn1): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv3d(8, 8, kernel_size=(1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=False)
      (bn2): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv3d(8, 32, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (bn3): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
     

In [7]:
model.fast_res5[2].conv3

Conv3d(64, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)

In [8]:
# target_layer = model.fast_res5[2].conv3
target_layer = model.slow_res5[2].conv3

In [9]:
model = GradCAM(model, target_layer)

In [10]:
cam, _ = model(clip, 268)

In [11]:
clip = reverse_normalize(clip)

In [12]:
heatmaps = visualize(clip, cam)

In [13]:
heatmaps.shape

torch.Size([1, 3, 240, 224, 224])

In [14]:
# # save image
# from torchvision.utils import save_image
# for i in range(n_frames):
#     heatmap = heatmaps[:, :, i].squeeze()
#     save_image(heatmap, './motion_cam/cam{:0>3}.png'.format(str(i)))

# save image
from torchvision.utils import save_image
for i in range(n_frames):
    heatmap = heatmaps[:, :, i].squeeze()
    save_image(heatmap, './semantic_cam/cam{:0>3}.png'.format(str(i)))

AttributeError: 'GradCAM' object has no attribute 'slowfast'