# Fast Saliency Model (FSM3)
 Implementation of  the paper: **"A compact deep architecture for real-time saliency prediction"**.

paper URL: https://doi.org/10.1016/j.image.2022.116671

1- The ".ipynb" is the implementation of the FSM3 model.

2- The ".h" file is the weights of the trained model.


- The project codes were rewritten to be used in google Colab.
- Copy the files in your google drive in "Colab Notbooks" folder. 
- To apply the model to your images, you should copy your images in the folder specified by "input_imgs_path".


#Use following Reference:

@article{zabihi2022compact,
  title={A compact deep architecture for real-time saliency prediction},
  author={Zabihi, Saman and Tavakoli, Hamed R and Borji, Ali and Mansoori, Eghbal},
  journal={Signal Processing: Image Communication},
  volume={104},
  pages={116671},
  year={2022},
  publisher={Elsevier}
}

Resources: https://github.com/cyberstray/fast-saliency-model/


In [12]:
weight_in_gdrive=input('Is there weight file in your google drive (y/n)?')

Is there weight file in your google drive (y/n)?y


# Show Hardware Specification

In [13]:
#!cat /proc/meminfo

In [14]:
#!cat /proc/cpuinfo

# Define Utilities





In [15]:
!pip install -q git+https://github.com/tensorflow/examples.git
import tensorflow as tf
from tensorflow_examples.models.pix2pix import pix2pix
from __future__ import division
import cv2
import numpy as np
import scipy.io
import scipy.ndimage


# input images
shape_r = 224
shape_c = 224


In [16]:
def padding(img, shape_r=240, shape_c=320, channels=3):
    img_padded = np.zeros((shape_r, shape_c, channels), dtype=np.uint8)
    if channels == 1:
        img_padded = np.zeros((shape_r, shape_c), dtype=np.uint8)
    original_shape = img.shape
    rows_rate = original_shape[0]/shape_r
    cols_rate = original_shape[1]/shape_c
    if rows_rate > cols_rate:
        new_cols = (original_shape[1] * shape_r) // original_shape[0]
        img = cv2.resize(img, (new_cols, shape_r))
        if new_cols > shape_c:
            new_cols = shape_c
        img_padded[:, ((img_padded.shape[1] - new_cols) // 2):((img_padded.shape[1] - new_cols) // 2 + new_cols)] = img
    else:
        new_rows = (original_shape[0] * shape_c) // original_shape[1]
        img = cv2.resize(img, (shape_c, new_rows))
        if new_rows > shape_r:
            new_rows = shape_r
        img_padded[((img_padded.shape[0] - new_rows) // 2):((img_padded.shape[0] - new_rows) // 2 + new_rows), :] = img
    return img_padded


def preprocess_images(paths, shape_r, shape_c):
    ims = np.zeros((len(paths), shape_r, shape_c, 3), dtype=np.float32)
    for i, path in enumerate(paths):
        #print(path)
        original_image = cv2.imread(path)
        #print(original_image.shape)
        padded_image = padding(original_image, shape_r, shape_c, 3)
        ims[i] = padded_image.astype(np.float32)/255.0
    return ims


def postprocess_predictions(pred, shape_r, shape_c):
    predictions_shape = pred.shape
    rows_rate = shape_r / predictions_shape[0]
    cols_rate = shape_c / predictions_shape[1]
    pred = pred / np.max(pred) * 255
    if rows_rate > cols_rate:
        new_cols = (predictions_shape[1] * shape_r) // predictions_shape[0]
        pred = cv2.resize(pred, (new_cols, shape_r))
        img = pred[:, ((pred.shape[1] - shape_c) // 2):((pred.shape[1] - shape_c) // 2 + shape_c)]
    else:
        new_rows = (predictions_shape[0] * shape_c) // predictions_shape[1]
        pred = cv2.resize(pred, (shape_c, new_rows))
        img = pred[((pred.shape[0] - shape_r) // 2):((pred.shape[0] - shape_r) // 2 + shape_r), :]
    img = scipy.ndimage.filters.gaussian_filter(img, sigma=7)
    img = img / np.max(img) * 255
    return img



# Define Fast Saliency Model

In [17]:
### create Encoder
base_model = tf.keras.applications.MobileNetV2(input_shape=[shape_r, shape_c, 3], include_top=False)

# Use the activations of these layers
layer_names = [
    'block_1_expand_relu',   # 64x64 *2
    'block_3_expand_relu',   # 32x32
    'block_6_expand_relu',   # 16x16
    'block_13_expand_relu',  # 8x8
    'block_16_project',      # 4x4
    ]
layers = [base_model.get_layer(name).output for name in layer_names]

# Create the feature extraction model
model = tf.keras.Model(inputs=base_model.input, outputs=layers)
model.trainable = True


In [18]:
import math
from keras import backend as K

def my_init(shape, dtype=None):
    return K.random_normal(shape, dtype=dtype, seed=0.2)


def kernel_init(shape, dtype=None):
    np.random.seed(0)
    kernel = np.random.normal(scale=0.5, size=shape).astype(float)
    return kernel


def Conv2d_cd2(input, nout):
    input_shape =input.get_shape().as_list()
    out_channels = nout
    _filter=kernel_init((3, 3, input_shape[-1], out_channels))
    conv1 = tf.keras.layers.Conv2D(out_channels, 3, padding='same', activation='relu', use_bias=False,
                                   kernel_initializer=kernel_init)
    out_normal=conv1(input)
    
    if math.fabs(0.7 - 0.0) < 1e-8:
        return out_normal 
    kernel_diff = tf.reduce_sum(input_tensor=_filter, axis=0, keepdims=True)
    print('/kernel_diff.shape:', kernel_diff.get_shape())
    kernel_diff = tf.reduce_sum(input_tensor=kernel_diff, axis=1, keepdims=True)
    print('/kernel_diff.shape:', kernel_diff.get_shape())
    kernel_diff = tf.tile(kernel_diff, [3, 3, 1, 1])
    print('/kernel_diff.shape:', kernel_diff.get_shape())

    out_diff = tf.nn.conv2d(input=input, filters=tf.dtypes.cast(kernel_diff, tf.float32), strides=[1, 1, 1, 1], padding='SAME')
    return -(out_normal - 0.5 * out_diff)


def CDC_BLOCK(x, nout):
  xcd = Conv2d_cd2(x, nout)
  xcd=tf.keras.layers.BatchNormalization(axis=-1)(xcd)
  return xcd


up_stack2 = [
    pix2pix.upsample(16, 3),  # 4x4 -> 8x8
    pix2pix.upsample(16, 3),  # 8x8 -> 16x16
    pix2pix.upsample(16, 3),  # 16x16 -> 32x32
    pix2pix.upsample(16, 3),   # 32x32 -> 64x64
]


In [19]:
def FSM3():
  inputs = tf.keras.layers.Input(shape=[shape_r, shape_c, 3])
  x = inputs

  # Downsampling through the model
  skips = model(x)
  n=1
  x112=tf.keras.layers.Conv2D(8, 3, padding='same', activation='relu')(skips[0])
  x112 = tf.keras.layers.Concatenate()([x112, CDC_BLOCK(x112, n)])
  x56=tf.keras.layers.Conv2D(8, 3, padding='same', activation='relu')(skips[1])
  x56 = tf.keras.layers.Concatenate()([x56, CDC_BLOCK(x56, n)])
  x28=tf.keras.layers.Conv2D(8, 3, padding='same', activation='relu')(skips[2])
  x28 = tf.keras.layers.Concatenate()([x28, CDC_BLOCK(x28, n)])
  x14=tf.keras.layers.Conv2D(8, 3, padding='same', activation='relu')(skips[3])
  x14 = tf.keras.layers.Concatenate()([x14, CDC_BLOCK(x14, n)])
  x7=tf.keras.layers.Conv2D(8, 3, padding='same', activation='relu')(skips[4])
  x7 = tf.keras.layers.Concatenate()([x7, CDC_BLOCK(x7, n)])
  x71=tf.keras.layers.Conv2D(1, 1, padding='same', activation='relu')(x7)
  x71f=tf.keras.layers.Flatten()(x71)
  fc_out7 = tf.keras.layers.Dense(56*56, activation='sigmoid', use_bias=False)(x71f)
  fc2d = tf.keras.layers.Reshape((56, 56, 1)) (fc_out7)
  fc2d=pix2pix.upsample(1, 3)(fc2d)
  skips2=[x14, x28, x56, x112]
  x=x7
  
  # Upsampling and establishing the skip connections
  for up, skip in zip(up_stack2, skips2):
    x = up(x)
    x = tf.keras.layers.Concatenate()([x, skip])

  #Central Difference Convolutional 
  # This is the last layers of the model
  last1 = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu')
  last2 = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu')
  last3 = tf.keras.layers.Conv2DTranspose(1, 3, strides=2,
      activation='relu', padding='same')  #64x64 -> 128x128 

  x = tf.keras.layers.Concatenate()([x, fc2d])
  x = last1(x)
  x = last2(x)
  x = last3(x)
  return tf.keras.Model(inputs=inputs, outputs=[x, x])


In [20]:
m=FSM3()

print(m.input_shape)
print(m.output_shape)
#m.summary()

/kernel_diff.shape: (1, 3, 8, 1)
/kernel_diff.shape: (1, 1, 8, 1)
/kernel_diff.shape: (3, 3, 8, 1)
/kernel_diff.shape: (1, 3, 8, 1)
/kernel_diff.shape: (1, 1, 8, 1)
/kernel_diff.shape: (3, 3, 8, 1)
/kernel_diff.shape: (1, 3, 8, 1)
/kernel_diff.shape: (1, 1, 8, 1)
/kernel_diff.shape: (3, 3, 8, 1)
/kernel_diff.shape: (1, 3, 8, 1)
/kernel_diff.shape: (1, 1, 8, 1)
/kernel_diff.shape: (3, 3, 8, 1)
/kernel_diff.shape: (1, 3, 8, 1)
/kernel_diff.shape: (1, 1, 8, 1)
/kernel_diff.shape: (3, 3, 8, 1)
(None, 224, 224, 3)
[(None, 224, 224, 1), (None, 224, 224, 1)]


# Test Section

In [21]:
#download a sample image from URL and save in '/content/' folder
!wget --output-document=lena.png https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png

--2022-08-05 16:52:32--  https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png
Resolving upload.wikimedia.org (upload.wikimedia.org)... 91.198.174.208, 2620:0:862:ed1a::2:b
Connecting to upload.wikimedia.org (upload.wikimedia.org)|91.198.174.208|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 473831 (463K) [image/png]
Saving to: ‘lena.png’


2022-08-05 16:52:32 (18.8 MB/s) - ‘lena.png’ saved [473831/473831]



In [22]:
from google.colab import files
from google.colab import drive
import os

if weight_in_gdrive=='y':
  drive.mount('/content/gdrive')
  m.trainable=True
  m.load_weights('/content/gdrive/My Drive/Colab Notebooks/FSM3_weights_salicon2017.h5')
m.trainable=False

b_s=1 #batch size
input_imgs_path='/content/'

def generator_test(images, b_s):
    images.sort()
    counter = 0
    while True:
        ims=preprocess_images(images[counter:counter + b_s], shape_r, shape_c)
        yield ims
        counter = (counter + b_s) % len(images)

images = [input_imgs_path + f for f in os.listdir(input_imgs_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
file_names = [f for f in os.listdir(input_imgs_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
images.sort()
file_names.sort()

predictions=[]
print("Predicting for", len(file_names), ' images in path: '+ input_imgs_path)
predictions = m.predict_generator(generator_test(images, b_s), steps=len(file_names)/b_s)

predictions=predictions[0]
print('Done!')


print('-- Writing output files with "output" prefix')
for pred, name in zip(predictions, file_names):
    original_image = cv2.imread(input_imgs_path + name, 0)
    res = postprocess_predictions(pred, original_image.shape[0], original_image.shape[1])
    cv2.imwrite(input_imgs_path+'output_' + '%s' % name[0:-4]+'.png', res.astype(int))
print('Done! the output files are in folder: '+input_imgs_path)



Mounted at /content/gdrive
Predicting for 2  images in path: /content/




Done!
-- Writing output files with "output" prefix
Done! the output files are in folder: /content/
