In [None]:
import tensorflow as tf
import pathlib
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd

## Move posters to use the data loader

In [None]:
source = pathlib.Path('../data/posters')
movefiles = pd.read_csv('../data/movefiles.csv')

for cat in movefiles.category.unique():
    p = source/cat
    p.mkdir()
    for genre in movefiles.genre.unique():
        q = p/genre
        q.mkdir()

p = source/'autres'
p.mkdir()

for i, row in movefiles.iterrows():
    s = source/row['name']
    if s.exists():
        s.replace(source/row['category']/row['genre']/row['name'])

for autre in source.glob('*.jpg'):
    autre.replace(source/'autres'/autre.name)

## Data loader

In [None]:
data_dir = pathlib.Path('../data/posters')

AUTOTUNE = tf.data.experimental.AUTOTUNE

BATCH_SIZE = 32
IMG_HEIGHT = 256
IMG_WIDTH = 256

NUMBER_TRAIN_SAMPLES = len(list((data_dir/'train').glob('*/*')))
NUMBER_VAL_SAMPLES = len(list((data_dir/'val').glob('*/*')))
NUMBER_TEST_SAMPLES = len(list((data_dir/'test').glob('*/*')))

STEP_SIZE_TRAIN = int(np.ceil(NUMBER_TRAIN_SAMPLES/BATCH_SIZE))
STEP_SIZE_VAL = int(np.ceil(NUMBER_VAL_SAMPLES/BATCH_SIZE))
STEP_SIZE_TEST = int(np.ceil(NUMBER_TEST_SAMPLES/BATCH_SIZE))

CLASS_NAMES = np.array(['Action', 'Animation', 'Comédie', 'Comédie-dramatique', 'Documentaire', 'Drame', 'Thriller-Policier'])

def get_label(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
    # The second to last is the class-directory
    return parts[-2] == CLASS_NAMES

def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT], method='lanczos3')

def process_path(file_path):
    label = get_label(file_path)
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img, label

In [None]:
from classification_models.tfkeras import Classifiers

ResNet18, preprocess_input = Classifiers.get('resnet18')

def augment(image,label):
    image = tf.image.random_flip_left_right(image)
#     image = tf.image.random_flip_up_down(image)
    image = tf.image.random_hue(image, 0.1)
    image = tf.image.random_saturation(image, 0.5, 2)
    image = tf.image.random_brightness(image, 0.3)
    image = tf.image.random_contrast(image, 0.2, 1.8)
#     image = tf.image.rot90(image, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))
    padding = 50
    image = tf.image.resize_with_crop_or_pad(image, IMG_WIDTH+padding, IMG_HEIGHT+padding)
    image = tf.image.random_crop(image, size=[IMG_WIDTH, IMG_HEIGHT, 3])
    return image, label


def prepare_data(ds, phase, shuffle_buffer_size=1000):
    ds = ds.shuffle(buffer_size=shuffle_buffer_size)
    ds = ds.repeat()
    if phase == 'train':
        ds = ds.map(augment, num_parallel_calls=AUTOTUNE)
    ds.map(lambda img, l : (preprocess_input(img), l), num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE)
    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)       
    return ds

In [None]:
list_ds = {x : tf.data.Dataset.list_files(str(data_dir/x/'*/*')) for x in ['train', 'val', 'test']}
labeled_ds = {x : list_ds[x].map(process_path, num_parallel_calls=AUTOTUNE) for x in ['train', 'val', 'test']}
dataset = {x: prepare_data(labeled_ds[x], x) for x in ['train', 'val', 'test']}

## Features extraction

In [None]:
model = tf.keras.models.load_model('../data/final_model.h5')
features_extractor = tf.keras.models.Model(
    inputs=model.input, outputs=model.get_layer('global_average_pooling2d').output)
features_extractor.summary()

In [None]:
train_features = features_extractor.predict(dataset['train'], steps=STEP_SIZE_TRAIN)
test_features = features_extractor.predict(dataset['test'], steps=STEP_SIZE_TEST)

## A compléter en se basant sur le notebook resnet_knn
## Attention: ne peux pas utiliser les mêmes fonctions de visualisation pour les posters, se référer aux notebook transfer learning