# PIPELINE FINALE

## Initialisation de la pipeline

### 0.1 - Import des librairies nécéssaires au code

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import collections
import os
import shutil
import datetime
import keras_tuner as kt
import pandas as pd
import visualkeras
import time

from concurrent.futures import ThreadPoolExecutor, as_completed
from PIL import Image, UnidentifiedImageError
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator

### 0.2 - Définition des constantes

In [None]:
# -------------------------------- General Parameters
SEED = 42
# -------------------------------- Images Parameters
IMAGE_H = 128
IMAGE_W = 128
BATCH_S = 16
# -------------------------------- Classes indexes
PAINTING_IDX = 0
PHOTO_IDX = 1
SCHEMA_IDX = 2
SKETCH_IDX = 3
TEXT_IDX = 4
# -------------------------------- Folders
DATASET_DIRECTORY = "dataset_livrable_1/"
PHOTOS_DIRECTORY = "final_pipeline/photos"
DENOISED_PHOTOS_DIRECTORY = "final_pipeline/denoised_photos"
MODEL_DIRECTORY = "models"
# -------------------------------- Model names
CLASSIFICATION_MODEL_NAME = "classification_model.keras"
AUTOENCODER_MODEL_NAME = "autoencoder_model.keras"
CAPTIONNING_MODEL_NAME = "captionning_model.keras"
# -------------------------------- Model paths
CLASSIFICATION_MODEL_PATH = os.path.join(MODEL_DIRECTORY, CLASSIFICATION_MODEL_NAME)
AUTOENCODER_MODEL_PATH = os.path.join(MODEL_DIRECTORY, AUTOENCODER_MODEL_NAME)
CAPTIONNING_MODEL_PATH = os.path.join(MODEL_DIRECTORY, CAPTIONNING_MODEL_NAME)

## Partie 1 : Classification

### 1.0 - Mise au propre des folders & tri des fichiers

In [None]:
def is_image(filename):
    try:
        with Image.open(filename) as img:
            img.verify()
        return True
    except (UnidentifiedImageError, OSError):
        return False
def move_non_images(directory):
    dump_directory = "dump"
    os.makedirs(dump_directory, exist_ok = True)
    
    for folder, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(folder, file)
            if not is_image(file_path):
                print(f"Déplacement de {file_path} dans le dossier dump/")
                dest_path = os.path.join(dump_directory, file)
                try:
                    shutil.move(file_path, dest_path)
                except:
                    print("Erreur lors du déplacement")
def is_valid_image(path):
    try:
        img_raw = tf.io.read_file(path)
        _ = tf.image.decode_image(img_raw, channels=3)
        return (path, True)
    except Exception:
        return (path, False)
def clean_corrupted_images(directory, extensions=("jpg", "jpeg", "png"), max_workers=8):
    image_paths = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(extensions):
                image_paths.append(os.path.join(root, file))

    print(f"Scan de {len(image_paths)} images dans {directory}")

    corrupted_count = 0
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(is_valid_image, path) for path in image_paths]
        for future in as_completed(futures):
            path, is_valid = future.result()
            if not is_valid:
                try:
                    os.remove(path)
                    corrupted_count += 1
                except Exception as e:
                    print(f"Erreur de suppression {path} : {e}")

    print(f"Vérification terminée : {corrupted_count} image(s) corrompue(s) supprimée(s).")

In [None]:
# move_non_images(dataset_directory)

# clean_corrupted_images(dataset_directory)

# Remove Photos from previous iteration
for dir in [
    # PHOTOS_DIRECTORY, 
    DENOISED_PHOTOS_DIRECTORY,
    ]:  
    # Remove the directory if it exists
    if os.path.exists(dir):
        shutil.rmtree(dir) 
    # Create the directory
    if not os.path.exists(dir):
        os.mkdir(dir)

### 1.1 - Import des données source

In [None]:
image_set = keras.utils.image_dataset_from_directory(
    DATASET_DIRECTORY,
    batch_size = BATCH_S,
    image_size = (IMAGE_H, IMAGE_W),
    label_mode = None,
    seed = 42,
    validation_split = None,
    subset = None,
    shuffle = None,
)
filepaths = image_set.file_paths

### 1.2 - Classification des données

In [None]:
# filter_model = load_model('../model_basic_cnn.keras')
classification_model = load_model(CLASSIFICATION_MODEL_PATH)

In [None]:
predicts = classification_model.predict(image_set, verbose = 1)
y_pred = []
y_pred.extend(predicts.argmax(axis=1))

### 1.3 - Copie des photos dans un répertoire spécifique

In [None]:
images_preds = list(zip(filepaths, y_pred))
photos_preds = list(filter(lambda x: x[1] == PHOTO_IDX,images_preds))

for filepath, prediction in images_preds:
    if prediction == PHOTO_IDX:
        filename = os.path.basename(filepath)
        # print(filename)
        dest_path = os.path.join(PHOTOS_DIRECTORY, filename)
        shutil.copy(filepath, dest_path)

## Partie 2 : Dénoising des images

### 2.0 - Import des données

In [None]:
image_set = keras.utils.image_dataset_from_directory(
    directory = PHOTOS_DIRECTORY,
    batch_size = BATCH_S,
    image_size = (IMAGE_H, IMAGE_W),
    label_mode = None,
    seed = 42,
    validation_split = None,
    subset = None,
    shuffle = None,
)
filepaths = image_set.file_paths

In [None]:
image_set = image_set.map(lambda x: tf.cast(x, tf.float32) / 255.0)
X = []
for batch in image_set:
    X.append(batch.numpy())
image_set = np.concatenate(X)

### 2.1 - Denoising des images sources

In [None]:
# load model from h5 file
autoencoder_model = load_model(AUTOENCODER_MODEL_PATH)

In [None]:
denoised_images = autoencoder_model.predict(image_set, verbose = 1)

In [None]:
plt.imshow(denoised_images[1])
plt.axis("off")
plt.show()

### 2.2 - Sauvegarde des images

In [None]:
# save images from denoised_images
for i, image in enumerate(denoised_images):
    # Convert the image to uint8 format
    image = (image * 255).astype(np.uint8)
    # Create a PIL Image from the numpy array
    pil_image = Image.fromarray(image)
    # Save the image
    filename = os.path.basename(filepaths[i])
    dest_path = os.path.join(DENOISED_PHOTOS_DIRECTORY, filename)
    pil_image.save(dest_path)