In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import collections
import os
import shutil
import datetime
import keras_tuner as kt
import visualkeras
import pandas as pd 

from concurrent.futures import ThreadPoolExecutor, as_completed
from PIL import Image, UnidentifiedImageError
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.utils.class_weight import compute_class_weight


In [None]:
dataset_directory = "dataset_livrable_1/"

In [None]:
# def is_image(filename):
#     try:
#         with Image.open(filename) as img:
#             img.verify()
#         return True
#     except (UnidentifiedImageError, OSError):
#         return False

# def move_non_images(directory):
#     dump_directory = "dump"
#     os.makedirs(dump_directory, exist_ok = True)
    
#     for folder, _, files in os.walk(directory):
#         for file in files:
#             file_path = os.path.join(folder, file)
#             if not is_image(file_path):
#                 print(f"Déplacement de {file_path} dans le dossier dump/")
#                 dest_path = os.path.join(dump_directory, file)
#                 try:
#                     shutil.move(file_path, dest_path)
#                 except:
#                     print("Erreur lors du déplacement")
                
# move_non_images(dataset_directory)

In [None]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
image_h = 128
image_w = 128
batch_s = 16

train_set, test_set = keras.utils.image_dataset_from_directory(
    dataset_directory,
    label_mode = "int",
    batch_size = batch_s,
    image_size = (image_h, image_w),
    seed = 42,
    validation_split = 0.3,
    subset = "both"
)

In [None]:
class_names = train_set.class_names
print(f"Classes détectées : {class_names}")

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
validation_size = int(0.5 * len(test_set))

train_set = train_set.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)
validation_set = test_set.take(validation_size).cache().prefetch(buffer_size = AUTOTUNE)
test_set = test_set.skip(validation_size).cache().prefetch(buffer_size = AUTOTUNE)
num_classes = len(class_names)

In [None]:
def print_class_distribution(dataset, name):
    label_counts = collections.Counter(label.numpy() for _, label in dataset.unbatch())

    classes = {0: "peintures", 1: "photos", 2: "schémas", 3: "croquis", 4: "textes scannés"}

    total = sum(label_counts.values())
  
    labels = []
    counts = []
    percentages = []

    for label_id in sorted(label_counts):
        class_name = classes.get(label_id, f"Classe inconnue ({label_id})")
        count = label_counts[label_id]
        labels.append(class_name)
        counts.append(count)
        percentages.append(count / total * 100)

    plt.figure(figsize=(10, 6))
    bars = plt.bar(labels, counts)

    for bar, pct in zip(bars, percentages):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2, height + 1, f"{pct:.1f}%", ha='center', va='bottom')

    plt.title(f"Répartition des classes ({name}_set - {total} images)")
    plt.xlabel("Classe")
    plt.ylabel("Nombre d'images")
    plt.xticks(rotation=15)
    plt.tight_layout()
    plt.show()
# print_class_distribution(train_set, "train")
# print_class_distribution(test_set, "test")
# print_class_distribution(validation_set, "validation")

In [None]:
def ModelTest(*args, **kwargs):
    activation = 'relu'
    model = Sequential(*args, **kwargs)
    model.add(layers.Rescaling(1./255))
    model.add(layers.Conv2D(16, (3, 3), padding = 'same', activation = activation))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.1))
    model.add(layers.Conv2D(32, (3, 3), padding = 'same', activation = activation))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.1))
    model.add(layers.Conv2D(64, (3, 3), padding = 'same', activation = activation))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation = activation))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(num_classes, activation = 'softmax'))
    model.summary()
    model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
    return model

In [None]:
def ModelTest2(*args,**kwargs):
    activation = 'relu'
    model = Sequential(*args, **kwargs)
    model.add(layers.Rescaling(scale = 1./255))
    model.add(layers.Conv2D(32, (3, 3), padding = 'same', activation = activation))
    model.add(layers.ZeroPadding2D(padding = (1, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), padding = 'same', activation = activation))
    model.add(layers.ZeroPadding2D(padding = (1, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), padding = 'same', activation = activation))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation = activation))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(num_classes, activation = 'softmax'))
    return model  

In [None]:
modeltest = ModelTest2()
modeltest.summary()
modeltest.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [None]:
callbacks = []
tensorboard_callback = TensorBoard(
    log_dir = log_dir,
    histogram_freq = 1
)
checkpoint_callback = ModelCheckpoint(
    filepath = 'checkpoints/best_model_acc.keras',
    monitor = 'val_accuracy',
    save_best_only = True,
    save_weights_only = False,
    mode = 'max',
    verbose = 1
)
checkpoint_callback_2 = ModelCheckpoint(
    filepath = 'checkpoints/best_model_loss.keras',
    monitor = 'val_loss',
    save_best_only = True,
    save_weights_only = False,
    mode = 'min',
    verbose = 1
)
early_stopping = keras.callbacks.EarlyStopping(
    monitor = 'val_accuracy',
    patience = 2,
    mode = 'max',
    restore_best_weights = True
)

callbacks.append(early_stopping)
callbacks.append(tensorboard_callback)
callbacks.append(checkpoint_callback)
callbacks.append(checkpoint_callback_2)

In [None]:
y_train = np.array([label.numpy() for _, label in train_set.unbatch()])
class_weights = compute_class_weight(class_weight = "balanced", classes = np.unique(y_train), y = y_train)
weights_dict = {cls: weight for cls, weight in zip(np.unique(y_train), class_weights)}
def train_model(model, train_set = train_set, test_set = test_set, weights = weights_dict, epochs = 10):

    history = model.fit(
        train_set,
        validation_data = test_set,
        epochs = epochs,
        callbacks = callbacks,
        class_weight = weights
    )
    hist_df = pd.DataFrame(history.history) 
    hist_csv_file = 'history.csv'
    with open(hist_csv_file, mode='w') as f:
        hist_df.to_csv(f)
    
    accuracy = history.history['accuracy']
    validation_accuracy = history.history['val_accuracy']
    
    epochs_range = range(epochs) if epochs == 10 else range(len(accuracy))
    
    loss = history.history['loss']
    validation_loss = history.history['val_loss']
    
    plt.figure(figsize = (16, 8))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, accuracy, label = 'Training Accuracy')
    plt.plot(epochs_range, validation_accuracy, label = 'Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title("Training and Validation Accuracy")
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label = 'Training Loss')
    plt.plot(epochs_range, validation_loss, label = 'Validation Loss')
    plt.legend(loc='upper right')
    plt.title("Training and Validation Loss")
    
    plt.show()

In [None]:
train_model(modeltest, epochs = 2)
modeltest.save('model.keras')


In [None]:
# modeltest.summary()
model = load_model('../models/modelTest2/best_model_loss.keras')

In [None]:

X_test = []
y_true = []

for images, labels in validation_set:
    X_test.append(images)
    y_true.append(labels)

X_test = np.concatenate(X_test)
y_true = np.concatenate(y_true)
def display_matrix(model, X_test = X_test, y_true = y_true, class_names = class_names):
    y_pred_proba = model.predict(X_test)
    y_pred = np.argmax(y_pred_proba, axis = 1)
    cm = confusion_matrix(y_true, y_pred)
    display = ConfusionMatrixDisplay(cm, display_labels = class_names)
    display.plot(cmap = plt.cm.Blues)
    plt.title("Matrice de confusion")
    plt.xticks(rotation = 45)
    plt.show()

In [None]:
display_matrix(model, X_test, y_true, class_names)
visualkeras.layered_view(
    model,
    legend = True,
    show_dimension = True,
)

In [None]:
model.summary()

In [None]:
acc = [
  0.6126
  ,0.8021
  ,0.8504
  ,0.8826
  ,0.9042
  ,0.9299
  ,0.9365
  ,0.9619
  ,0.9731
]

loss = [
  0.8674
  ,0.4140
  ,0.3210
  ,0.2464
  ,0.2038
  ,0.1472
  ,0.1450
  ,0.0850
  ,0.0613
]

val_acc = [
  0.7699,
  0.8469,
  0.8501,
  0.8456,
  0.8752,
  0.8481,
  0.8798,
  0.8777,
  0.8658
]

val_loss = [
  0.4853,
  0.3922,
  0.3882,
  0.4020,
  0.3542,
  0.4533,
  0.3904,
  0.4956,
  0.6643
]
epochs_range = range(len(acc))
plt.figure(figsize = (16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label = 'Training Accuracy')
plt.plot(epochs_range, val_acc, label = 'Validation Accuracy')
plt.legend(loc='lower right')
plt.title("Training and Validation Accuracy")

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label = 'Training Loss')
plt.plot(epochs_range, val_loss, label = 'Validation Loss')
plt.legend(loc='upper right')
plt.title("Training and Validation Loss")

In [None]:
df = pd.read_csv('history.csv', index_col = 0)
# print the dataframe
print(df.head())

# plot the training, splitting between accuracy and loss
plt.figure(figsize = (16, 8))
plt.subplot(1, 2, 1)
plt.plot(df['accuracy'], label = 'Training Accuracy')
plt.plot(df['val_accuracy'], label = 'Validation Accuracy')
plt.legend(loc='lower right')
plt.title("Training and Validation Accuracy")

plt.subplot(1, 2, 2)
plt.plot(df['loss'], label = 'Training Loss')
plt.plot(df['val_loss'], label = 'Validation Loss')
plt.legend(loc='upper right')
plt.title("Training and Validation Loss")
plt.show()
