In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.utils import load_img, img_to_array

In [15]:
    # 使用VGG16进行猫狗图像分类并展示Grad-CAM的简单例子
# 加载预训练的VGG16模型，并对图片进行预处理
model = VGG16(weights='imagenet')
model.summary()

img_path = '../data/4.jpg'
img = load_img(img_path, target_size=(224, 224))
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = preprocess_input(img_array)

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [16]:
# 接下来，进行图像分类，获取目标类别的ID和名称
preds = model.predict(img_array)
print('Predicted:', tf.keras.applications.vgg16.decode_predictions(preds, top=1)[0])

_, category_id, _ = tf.keras.applications.vgg16.decode_predictions(preds, top=1)[0][0]
category_name = model.output.op.inputs[0].op.name.split('/')[0] + '_' + str(category_id)

Predicted: [('n03272010', 'electric_guitar', 0.405172)]


In [17]:
# 接着，使用Keras的backend函数获取目标类别对应的卷积层输出，并计算出相应的梯度
conv_layer = model.get_layer('block5_conv3')
grad_model = tf.keras.models.Model([model.inputs], [conv_layer.output, model.output])
with tf.GradientTape() as tape:
    conv_output, predictions = grad_model(img_array)
    loss = predictions[:, category_id]

grads = tape.gradient(loss, conv_output)[0]

TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got 'electric_guitar'

In [None]:
# 然后，进行梯度的池化操作，并得到相应的权重
weights = np.mean(grads, axis=(0, 1))
cam = np.zeros(conv_output.shape[1:3], dtype=np.float32)

for i, w in enumerate(weights):
    cam += w * conv_output[0, :, :, i]

cam = cv2.resize(cam.numpy(), (224, 224))
cam = np.maximum(cam, 0)
heatmap = cam / np.max(cam)

In [None]:
# 最后，将热力图叠加到原始图像上，展示最终结果
img = cv2.imread(img_path)
heatmap = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
superimposed_img = heatmap * 0.4 + img
cv2.imwrite(category_name + '.jpg', superimposed_img)

plt.imshow(cv2.cvtColor(superimposed_img, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()
