In [2]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
import tensorflow as tf

import numpy as np
import imutils
import cv2
import matplotlib.cm as cm

In [3]:
base = EfficientNetB0(input_shape=(224,224,3),include_top=False, weights="imagenet")
inp = base.inputs
x = base.output
x = GlobalAveragePooling2D()(x)
x = Dense(32, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=inp,outputs=x)
model.summary()

In [6]:
orig = cv2.imread('beagle.jpg')
resized = cv2.resize(orig,(224,224))

In [8]:
image = load_img('beagle.jpg', target_size = (224,224))
image = img_to_array(image)
print(image.shape)
image = np.expand_dims(image,axis=0)
print(image[0,0,0])
image = imagenet_utils.preprocess_input(image)
print(image[0,0,0])

(224, 224, 3)
[187. 184. 146.]
[42.060997 67.221    63.32    ]


In [10]:
preds = model.predict(image)
i = np.argmax(preds[0])
i



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


0

In [11]:
class GradCAM:
    def __init__(self, model, classIdx, layerName =None):
        """"store the model, the class index used to measure the class
        activation map and the layer to be used when visualizing
        the class activation map"""
        self.model = model
        self.classIdx = classIdx
        self.layerName = layerName
        #if the layer name is None, attemp to automatically find the target output layer
        if self.layerName is None:
            self.layerName = self.find_target_layer()
            print(self.layerName)
    def find_target_layer(self):
        """attempt to find the final convolutional layer in the network
        #by looping over the layer of the network in reverse order"""
        for layer in reversed(self.model.layers):
            #check to see if the layer has a 4D ouput
            if len(layer.output.shape) ==4:
                return layer.name
        # If we can not find the 4D layer raise the valueError
        raise ValueError("Could not find the 4D layer, cannot apply GradCam.")
    def compute_heatmap(self, image, eps=1e-8):
        """construct our gradient model by supplying the inputs
            to our pre-trained model, the output of the (presumably) final
            4D layer in the network and the ouput of the softmax activation
            from the model"""
        gradModel = Model(inputs=[self.model.inputs], 
                          outputs=[self.model.get_layer(self.layerName).output,
                                  self.model.output])
        #record operations for automatic differentiation
        with tf.GradientTape() as tape:
            """Cast the image tensor to a float-32 data type,
            pass the image through the gradient model, and grab
            the loss associated with the specific class index"""
            inputs = tf.cast(image,tf.float32)
            (convOutputs, predictions) = gradModel(inputs)
            loss = predictions[:,self.classIdx]
        # use automatic differentiation to compute the gradients
        grads = tape.gradient(loss, convOutputs)

        #compute the guided gradients
        castConvOutputs = tf.cast(convOutputs>0,"float32")
        castGrads = tf.cast(grads>0,'float32')
        guidedGrads = castConvOutputs*castGrads*grads
        #grab the volume and discard the batch
        convOutputs = convOutputs[0]
        guidedGrads = guidedGrads[0]
        #compute the average of the gradient values and using them as
        #weights, compute the ponderation of the filters with respect
        #to the weights
        weights = tf.reduce_mean(guidedGrads, axis=(0,1))
        cam = tf.reduce_sum(tf.multiply(weights,convOutputs),axis=-1)
        print(cam)
        #grab the spatial dimenstions of the input image and resize 
        #the output class activation map to match the input image dimensions
        (w,h) = (image.shape[2], image.shape[1])
        heatmap = cv2.resize(cam.numpy(),(w,h))

        # normalize the heatmap such that all value lie in the range [0,1]
        #scale the resulting values to the range [0,255] and then convert
        #to an unsigned 8-bit interger
        numer = heatmap - np.min(heatmap)
        denom = (heatmap.max() - heatmap.min()) + eps
        heatmap = numer/denom
        heatmap = (heatmap*255).astype('uint8')
        #return the resulting heatmap to the calling function
        return heatmap
    def overlay_heatmap(self, heatmap, image, alpha=0.5,
        colormap=cv2.COLORMAP_JET):
        # apply the supplied color map to the heatmap and then
        # overlay the heatmap on the input image
        heatmap = cv2.applyColorMap(heatmap, colormap)
        output = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0)

        # return a 2-tuple of the color mapped heatmap and the output,
        # overlaid image
        return (heatmap, output)

In [14]:
# initialize our gradient class activation map and build the heatmap
cam = GradCAM(model, i)
heatmap = cam.compute_heatmap(image)

top_activation




tf.Tensor(
[[-0.00371769 -0.00348559 -0.00344578 -0.00219665 -0.00287046 -0.00094495
  -0.00057774]
 [-0.00347897 -0.00347633 -0.00095249  0.0123751   0.00587874 -0.00118047
  -0.00132469]
 [-0.00332633 -0.0016963   0.01786269  0.04435014  0.03145295  0.00088659
  -0.00226897]
 [-0.00168266  0.00307369  0.01601163  0.02236929  0.01675005  0.00034168
  -0.0027207 ]
 [ 0.00439809  0.0157595   0.02168857  0.01673638  0.01685585  0.00229228
   0.0004368 ]
 [ 0.00472195  0.01108543  0.01432847  0.01317955  0.01715365  0.00410251
   0.00074184]
 [ 0.00019223  0.00050221  0.00131629  0.00229584  0.00503218  0.00194924
   0.00018167]], shape=(7, 7), dtype=float32)


In [21]:
# resize the resulting heatmap to the original input image dimensions
# and then overlay heatmap on top of the image
heatmap = cv2.resize(heatmap, (orig.shape[1], orig.shape[0]))
(heatmap, output) = cam.overlay_heatmap(heatmap, orig, alpha=0.5)
output

array([[[167,  92,  94],
        [167,  92,  94],
        [167,  92,  94],
        ...,
        [124,  60,  90],
        [124,  61,  89],
        [124,  62,  88]],

       [[167,  92,  94],
        [167,  92,  94],
        [167,  92,  94],
        ...,
        [124,  60,  90],
        [124,  61,  89],
        [124,  62,  88]],

       [[167,  92,  94],
        [167,  92,  94],
        [167,  92,  94],
        ...,
        [124,  60,  90],
        [124,  61,  89],
        [124,  62,  88]],

       ...,

       [[194, 100, 118],
        [194, 100, 118],
        [194, 100, 118],
        ...,
        [212, 109, 120],
        [212, 109, 120],
        [212, 109, 120]],

       [[194, 100, 118],
        [194, 100, 118],
        [194, 100, 118],
        ...,
        [212, 108, 118],
        [212, 108, 118],
        [212, 108, 118]],

       [[194, 100, 118],
        [194, 100, 118],
        [194, 100, 118],
        ...,
        [211, 108, 118],
        [211, 108, 118],
        [211, 108, 118]]

In [20]:
# draw the predicted label on the output image
cv2.rectangle(output, (0, 0), (340, 40), (0, 0, 0), -1)
cv2.putText(output, "OK", (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

# display the original image and resulting heatmap and output image
# to our screen
output = np.vstack([orig, heatmap, output])
output = imutils.resize(output, height=700)
cv2.imshow("Output", output)
cv2.waitKey(0)
cv2.destroyAllWindows()