# Backbone Preprocessing

- Define a feature extractor model using one of the backbones: ResNet50, InceptionResnetV2
- Read the image dataset and run the model on each image
- Store the output into the corresponding folder: backbone, train or test

## Setup

In [1]:
import pandas as pd
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf

## Define the feature extractor model

In [2]:
class FeatureExtractor(tf.keras.Model):
    def __init__(self, model_name):
        """
            Initialize Feature extractor with a pretrained CNN model

            Args:
                model_name: name of the pretrained CNN model ["resnet", "inception_resnet"]
        """
        super(FeatureExtractor, self).__init__()
        if model_name == "resnet":
            from tensorflow.keras.applications.resnet_v2 import ResNet50V2, preprocess_input
            self.model = ResNet50V2(include_top=False, weights='imagenet', pooling='avg')
            self.model_input_size = (224, 224)
        elif model_name == "inception_resnet":
            from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
            self.model = InceptionResNetV2(include_top=False, weights='imagenet', pooling='avg')
            self.model_input_size = (299, 299)
        else:
            raise NameError('Invalid pretrained model name - must be one of ["resnet", "inception_resnet"]')
        
        self.preprocess_input = preprocess_input
        self.model.trainable = False

    def call(self, inputs):
        """
            Call the pretrained CNN model to predict the features for a given input image

            Args:
                inputs: input image tensor
        """
        # Resize inputs to the expected input size
        inputs = inputs*255
        inputs = tf.image.resize(inputs, self.model_input_size)
        inputs = inputs[tf.newaxis, :]
        preprocessed_input = self.preprocess_input(inputs)
        return self.model.predict(preprocessed_input).ravel()

# Define input and output folders

In [3]:
# Dataset input and output folders
image_dataset_path = '../data/processed/ImageDatasetRGB'
dataset_path = '../data/processed/Dataset'

In [4]:
videos = sorted(os.listdir(os.path.join(image_dataset_path, 'features')))
labels = sorted(os.listdir(os.path.join(image_dataset_path, 'labels')))

In [5]:
split = pd.read_csv(os.path.join(dataset_path, 'split.csv'))

## ResNet Feature Extraction

In [6]:
feature_extractor = FeatureExtractor('resnet')

In [7]:
resnet_path = os.path.join(dataset_path, 'resnet')
os.mkdir(resnet_path)

In [8]:
os.mkdir(os.path.join(resnet_path, 'train'))
os.mkdir(os.path.join(resnet_path, 'train', 'features'))
os.mkdir(os.path.join(resnet_path, 'train', 'labels'))
os.mkdir(os.path.join(resnet_path, 'test'))
os.mkdir(os.path.join(resnet_path, 'test', 'features'))
os.mkdir(os.path.join(resnet_path, 'test', 'labels'))

In [9]:
for video, label in zip(videos, labels):
    features = []
    for frame_name in sorted(os.listdir(os.path.join(image_dataset_path, 'features', video))):
        frame = tf.io.read_file(os.path.join(image_dataset_path, 'features', video, frame_name))
        frame = tf.image.decode_image(frame, channels=3)
        frame = tf.image.convert_image_dtype(frame, tf.float32)
        features.append(feature_extractor(frame))

    video_features = np.array(features)
    video_labels = pd.read_csv(os.path.join(image_dataset_path, 'labels', label))

    if split[split['name'] == video]['set'].values == 'train':
        output_folder = os.path.join(resnet_path, 'train')
    else:
        output_folder = os.path.join(resnet_path, 'test')
    np.save(os.path.join(output_folder, 'features', video + '.npy'), video_features)
    video_labels.to_csv(os.path.join(output_folder, 'labels', video + '.csv'), index=None)
    print('.', end='')
    
print('\n')

## InceptionResNet Feature Extraction

In [6]:
feature_extractor = FeatureExtractor('inception_resnet')

In [7]:
inception_resnet_path = os.path.join(dataset_path, 'inception_resnet')
os.mkdir(inception_resnet_path)

In [8]:
os.mkdir(os.path.join(inception_resnet_path, 'train'))
os.mkdir(os.path.join(inception_resnet_path, 'train', 'features'))
os.mkdir(os.path.join(inception_resnet_path, 'train', 'labels'))
os.mkdir(os.path.join(inception_resnet_path, 'test'))
os.mkdir(os.path.join(inception_resnet_path, 'test', 'features'))
os.mkdir(os.path.join(inception_resnet_path, 'test', 'labels'))

In [None]:
for video, label in zip(videos, labels):
    features = []
    for frame_name in sorted(os.listdir(os.path.join(image_dataset_path, 'features', video))):
        frame = tf.io.read_file(os.path.join(image_dataset_path, 'features', video, frame_name))
        frame = tf.image.decode_image(frame, channels=3)
        frame = tf.image.convert_image_dtype(frame, tf.float32)
        features.append(feature_extractor(frame))

    video_features = np.array(features)
    video_labels = pd.read_csv(os.path.join(image_dataset_path, 'labels', label))

    if split[split['name'] == video]['set'].values == 'train':
        output_folder = os.path.join(inception_resnet_path, 'train')
    else:
        output_folder = os.path.join(inception_resnet_path, 'test')
    np.save(os.path.join(output_folder, 'features', video + '.npy'), video_features)
    video_labels.to_csv(os.path.join(output_folder, 'labels', video + '.csv'), index=None)
    print('.', end='')
    
print('\n')