In [None]:
from os import getcwd, listdir, path as ospath, walk as oswalk
import keras

In [None]:
import kagglehub

In [None]:

# Download latest version (CASIA)
path = kagglehub.dataset_download("divg07/casia-20-image-tampering-detection-dataset")
fqPath = ospath.join(getcwd(), path, 'CASIA2')
print(listdir(fqPath+"/Au"))
print(listdir(fqPath+"/Tp"))

# Download (CoMoFoD)
# comoPath = kagglehub.dataset_download("tusharchauhan1898/comofod")
# comoFqPath =  ospath.join(getcwd(), comoPath, 'CoMoFoD_small_v2')
# print(listdir(comoFqPath))


Downloading from https://www.kaggle.com/api/v1/datasets/download/divg07/casia-20-image-tampering-detection-dataset?dataset_version_number=1...


 29%|██▉       | 763M/2.56G [00:11<01:37, 19.9MB/s]

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetV2B0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Conv2D, BatchNormalization, Multiply, Input
from tensorflow.keras.models import Model
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import roc_curve, auc

In [None]:
# Configuration
IMG_SIZE = 224
BATCH_SIZE = 32 #nombre d'images exécutées simultanément
EPOCHS = 20 #nombre de cycles
def create_similarity_branch():
    return tf.keras.Sequential([
        Conv2D(256, 3, padding='same', activation='relu'),
        BatchNormalization(),
        Conv2D(128, 3, padding='same', activation='relu'),
        BatchNormalization()
    ])


In [None]:
def create_model():
    # Input
    input_shape = (IMG_SIZE, IMG_SIZE, 3)
    inputs = Input(shape=input_shape)

    # Base EfficientNetB0
    base_model = EfficientNetV2B0(weights='imagenet', include_top=False, input_tensor=inputs)

    # Dégeler les couches
    for layer in base_model.layers:
        layer.trainable = True

    # Extraction des features
    features = base_model.output

    # Branche de similarité pour détecter les régions copiées-collées
    similarity_branch = create_similarity_branch()(features)

    # Attention mechanism pour focus sur les régions suspectes
    attention = Conv2D(1, 1, activation='sigmoid', name='attention_output')(similarity_branch)
    attended_features = Multiply()([similarity_branch, attention])

    # Branch principale
    x = GlobalAveragePooling2D()(attended_features)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.3)(x)

    # Sortie principale uniquement
    predictions = Dense(1, activation='sigmoid', name='main_output')(x)

    model = Model(inputs=inputs, outputs=predictions)
    return model

In [None]:

def prepare_data(data_dir):
    datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        validation_split=0.2, #on va prendre 80% des données en entrainement et 20% des données pour les tests
        brightness_range=[0.8, 1.2],
        zoom_range=0.1,
        fill_mode='nearest'
    )

     # Ajout de transformations spécifiques pour le copier-coller
    # def copy_move_augmentation(image):
    #     if np.random.random() < 0.3:  # 30% de chance d'appliquer
    #         h, w = image.shape[:2]
    #         # Sélectionner une région aléatoire
    #         size = np.random.randint(h//8, h//4)
    #         x1 = np.random.randint(0, w - size)
    #         y1 = np.random.randint(0, h - size)
    #         # Copier et coller à un nouvel endroit
    #         x2 = np.random.randint(0, w - size)
    #         y2 = np.random.randint(0, h - size)
    #         region = image[y1:y1+size, x1:x1+size].copy()
    #         image[y2:y2+size, x2:x2+size] = region
    #     return image

    # datagen.preprocessing_function = copy_move_augmentation

    # Prendre les dossiers Au et Tp pour la BD Casia V2
    classes = ['Au', 'Tp']

    #Prendre les dossiers Ori et Faux pour la BD Comofod
    # classes = ['Ori', 'Faux']

    train_generator = datagen.flow_from_directory(
        data_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary',  # Classification binaire
        subset='training',
        classes=classes,
        shuffle=True

    )

    validation_generator = datagen.flow_from_directory(
        data_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='validation',
        classes=classes,
        shuffle=True

    )

    return train_generator, validation_generator

In [None]:
def train_model():
    model = create_model()

    # Compilation simplifiée avec une seule sortie
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
    )
    #Chemin BD Casia V2
    train_generator, validation_generator = prepare_data(fqPath)

    #Chemin BD Comofod
    # train_generator, validation_generator = prepare_data('C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/datasetComofod')

    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=5,
            restore_best_weights=True,
            mode='max'
        ),
        tf.keras.callbacks.ModelCheckpoint(
            'best_model.keras',
            save_best_only=True,
            monitor='val_accuracy',
            mode='max'
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=3,
            mode='min'
        )
    ]

    history = model.fit(
        train_generator,
        epochs=EPOCHS,
        validation_data=validation_generator,
        callbacks=callbacks,
        class_weight={
            0: 1.0,  # Authentique
            1: 1.5   # Falsifié
        }
    )

    return model, history

In [None]:

   # Visualisation des résultats
import matplotlib.pyplot as plt

    #Seuil de décision
def plot_roc_curve(y_true, y_pred_proba):
     # Calculer les points de la courbe ROC
    fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)

     # Calculer l'AUC
    roc_auc = auc(fpr, tpr)

    # Trouver le seuil optimal
    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]

    print(f"Seuil optimal basé sur ROC : {optimal_threshold:.2f}")
    return optimal_threshold

def predict_image(model, image_path, treshold):
    img = tf.keras.preprocessing.image.load_img(
        image_path, target_size=(IMG_SIZE, IMG_SIZE)
    )
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = img_array / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    # Obtenir la prédiction
    prediction = model.predict(img_array)[0]

    # Obtenir la carte d'attention (il faut créer un modèle intermédiaire)
    attention_model = Model(inputs=model.input,
                          outputs=model.get_layer('attention_output').output)
    attention_map = attention_model.predict(img_array)

    # Seuil de décision
    is_fake = prediction > optimal_threshold

    print(f"\nRésultat de l'analyse pour {image_path}:")
    print(f"Score de falsification: {prediction[0]:.2%}")
    print(f"Verdict: {'Image FALSIFIEE' if is_fake else 'Image AUTHENTIQUE'}")

    return {
        'prediction': float(prediction[0]),
        'is_fake': bool(is_fake),
        'confidence': float(abs(0.5 - prediction[0]) * 2)
    }


In [None]:

def evaluate_model(model, test_generator):
        # Prédire les probabilités sur le jeu de test
        y_pred_proba = model.predict(test_generator)

        # Obtenir les vraies classes
        y_true = test_generator.classes

        # Tracer la courbe ROC et trouver le seuil optimal
        optimal_threshold = plot_roc_curve(y_true, y_pred_proba)

        # Convertir les probabilités en prédictions binaires avec le seuil optimal
        y_pred_binary = (y_pred_proba > optimal_threshold).astype(int)

        # Calculer et afficher les métriques
        from sklearn.metrics import classification_report, confusion_matrix

        print("\nRapport de Classification:")
        print(classification_report(y_true, y_pred_binary, target_names=['Authentique', 'Falsifiée']))

        print("\nMatrice de Confusion:")
        print(confusion_matrix(y_true, y_pred_binary))

        return optimal_threshold




In [None]:
if __name__ == "__main__":
        # Entraîner le modèle
        model, history = train_model()

        # Préparer le générateur de validation
        _, validation_generator = prepare_data(fqPath)
        # _, validation_generator = prepare_data('C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/datasetComofod')

        # Évaluer le modèle et calculer le seuil optimal
        optimal_threshold = evaluate_model(model, validation_generator)

        print(f"Seuil optimal utilisé pour les prédictions : {optimal_threshold:.2f}")


        # Test sur quelques images
        # test_images = [
        #     fqPath
        #     # BD Casia V2
        #     #"C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/dataset/Au/Au_ani_00004.jpg",
        #     #"C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/dataset/Au/Au_ani_00001.jpg"
        #     #"C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/dataset/Tp/Tp_D_CNN_M_N_ani00057_ani00055_11149.jpg"
        #     #"C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/dataset/Tp/Tp_D_CNN_S_N_cha10117_nat10139_12166.jpg"
        #     #"C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/dataset/Au/Au_ani_00015.jpg"
        #     #"C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/dataset/Au/Au_ani_00029.jpg"
        #     #"C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/dataset/Tp/Tp_D_CNN_M_N_ani00023_ani00024_10205.tif"
        #     #"C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/dataset/Tp/Tp_S_NRN_S_N_txt00070_txt00070_11316.jpg"

        #     #BD Comofod
        #    #"C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/datasetComofod/Ori/001_O.png"
        #     # "C:/Users/PC 9/OneDrive/Documents/M2/Deep_Learning/datasetComofod/Faux/001_F.png"
        # ]

        # for image_path in test_images:
        #     results = predict_image(model, image_path, optimal_threshold)


In [None]:
keras.saving.save_model(model, "v2.keras")
print("Modèle sauvegardé avec succès.")
# print(listdir())

# Test de performance


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from tensorflow.keras.models import load_model
import tensorflow as tf
from skimage.feature import match_template

In [None]:
# Configuration globale
MODEL_PATH = "v2.keras"
IMG_SIZE = 224
OUTPUT_CSV = "results_comofod.csv"
OUTPUT_GRAPH_DIR = "graphs_comofod"
os.makedirs(OUTPUT_GRAPH_DIR, exist_ok=True)
print(os.listdir())

['.config', 'graphs_comofod', 'v2.keras', 'drive', 'sample_data']


In [None]:
# Charger le modèle CNN
try:
    cnn_model = load_model(os.path.join(os.getcwd(), MODEL_PATH))
    print(f"Modèle CNN chargé depuis {MODEL_PATH}.")
except Exception as e:
    print(f"Erreur lors du chargement du modèle CNN : {e}")
    cnn_model = None

Erreur lors du chargement du modèle CNN : Bad magic number for file header


In [None]:
# Fonction pour détecter les falsifications (méthode traditionnelle)
def detect_forgery(image_path, rows=32, cols=32, similarity_threshold=0.85, forgery_threshold=15):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"Erreur : Impossible de charger l'image {image_path}")
        return None

    h, w = image.shape
    block_height = h // rows
    block_width = w // cols
    suspicious_areas_count = 0

    for i in range(rows):
        for j in range(cols):
            y_start = i * block_height
            y_end = min(y_start + block_height, h)
            x_start = j * block_width
            x_end = min(x_start + block_width, w)

            block = image[y_start:y_end, x_start:x_end]
            result = match_template(image, block)
            locations = np.where(result >= similarity_threshold)

            suspicious_areas_count += len(locations[0])

    return suspicious_areas_count >= forgery_threshold


In [None]:
# Fonction pour prédire avec CNN
def predict_with_cnn(image_path):
    if not cnn_model:
        raise Exception("Modèle CNN non chargé.")

    img = tf.keras.preprocessing.image.load_img(image_path, target_size=(IMG_SIZE, IMG_SIZE))
    img_array = tf.keras.preprocessing.image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    prediction = cnn_model.predict(img_array)[0][0]
    return prediction > 0.5, prediction

In [None]:
# Adjusted limit for each directory
LIMIT = 10

def evaluate_methods(authentic_dir, forgery_dir):
    results = []
    true_labels = []
    cnn_predictions = []
    traditional_predictions = []

    print(f"Authenticate files: {len(os.listdir(authentic_dir))}")
    print(f"Forgery files: {len(os.listdir(forgery_dir))}")

    # Counter to limit the number of processed images
    authentic_count = 0
    forgery_count = 0

    # Process authentic directory
    for file in os.listdir(authentic_dir):
        if authentic_count >= LIMIT:
            break

        image_path = os.path.join(authentic_dir, file)
        if not file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            continue

        true_labels.append(0)  # Label 0 for authentic

        # CNN method
        try:
            cnn_forged, cnn_score = predict_with_cnn(image_path)
        except Exception as e:
            print(f"Error in CNN for {image_path}: {e}")
            cnn_forged, cnn_score = None, None

        # Traditional method
        traditional_forged = detect_forgery(image_path)

        # Save results
        results.append({
            "Image": image_path,
            "True Label": "Authentique",
            "CNN Prediction": "Falsifiée" if cnn_forged else "Authentique",
            "CNN Score": cnn_score,
            "Traditional Prediction": "Falsifiée" if traditional_forged else "Authentique"
        })
        print("--> Saving authentic results...")

        cnn_predictions.append(int(cnn_forged))
        traditional_predictions.append(int(traditional_forged))
        authentic_count += 1

    # Process forgery directory
    for file in os.listdir(forgery_dir):
        if forgery_count >= LIMIT:
            break

        image_path = os.path.join(forgery_dir, file)
        if not file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            continue

        true_labels.append(1)  # Label 1 for forgery

        # CNN method
        try:
            cnn_forged, cnn_score = predict_with_cnn(image_path)
        except Exception as e:
            print(f"Error in CNN for {image_path}: {e}")
            cnn_forged, cnn_score = None, None

        # Traditional method
        traditional_forged = detect_forgery(image_path)

        # Save results
        results.append({
            "Image": image_path,
            "True Label": "Falsifiée",
            "CNN Prediction": "Falsifiée" if cnn_forged else "Authentique",
            "CNN Score": cnn_score,
            "Traditional Prediction": "Falsifiée" if traditional_forged else "Authentique"
        })
        print("--> Saving forgery results...")

        cnn_predictions.append(int(cnn_forged))
        traditional_predictions.append(int(traditional_forged))
        forgery_count += 1

    # Save results to CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv(OUTPUT_CSV, index=False)
    print(f"Results saved to {OUTPUT_CSV}.")

    # Return data for metrics
    return true_labels, cnn_predictions, traditional_predictions


In [None]:
# Générer les graphiques
def generate_graphs(true_labels, cnn_predictions, cnn_scores, traditional_predictions):
    # Courbe ROC pour CNN
    fpr, tpr, thresholds = roc_curve(true_labels, cnn_scores)
    roc_auc = auc(fpr, tpr)

    plt.figure()
    plt.plot(fpr, tpr, color="darkorange", lw=2, label=f"Courbe ROC (AUC = {roc_auc:.2f})")
    plt.plot([0, 1], [0, 1], color="navy", linestyle="--")
    plt.xlabel("Taux de faux positifs (FPR)")
    plt.ylabel("Taux de vrais positifs (TPR)")
    plt.title("Courbe ROC - Méthode CNN")
    plt.legend(loc="lower right")
    plt.savefig(os.path.join(OUTPUT_GRAPH_DIR, "roc_cnn.png"))
    print(f"Graphique ROC sauvegardé dans {OUTPUT_GRAPH_DIR}/roc_cnn.png.")

    # Matrices de confusion
    cm_cnn = confusion_matrix(true_labels, cnn_predictions)
    cm_traditional = confusion_matrix(true_labels, traditional_predictions)

    plt.figure()
    plt.matshow(cm_cnn, cmap="Blues")
    plt.title("Matrice de confusion - Méthode CNN")
    plt.colorbar()
    plt.ylabel("Vraies classes")
    plt.xlabel("Prédictions")
    plt.savefig(os.path.join(OUTPUT_GRAPH_DIR, "confusion_matrix_cnn.png"))

    plt.figure()
    plt.matshow(cm_traditional, cmap="Blues")
    plt.title("Matrice de confusion - Méthode Traditionnelle")
    plt.colorbar()
    plt.ylabel("Vraies classes")
    plt.xlabel("Prédictions")
    plt.savefig(os.path.join(OUTPUT_GRAPH_DIR, "confusion_matrix_traditional.png"))
    print("Matrices de confusion sauvegardées.")


In [None]:
import kagglehub
import os
from os import path as ospath, listdir, getcwd
# Download (CoMoFoD)
comoPath = kagglehub.dataset_download("tusharchauhan1898/comofod")
comoFqPath =  ospath.join(getcwd(), comoPath, 'CoMoFoD_small_v2')
print(listdir(comoFqPath))

['015_O_JC6.jpg', '170_F_BC3.png', '149_F_JC2.jpg', '135_O_CR3.png', '044_O_NA3.png', '078_O_CR2.png', '143_F_NA2.png', '127_F_JC2.jpg', '086_O_CR3.png', '133_O_JC3.jpg', '052_F_BC1.png', '165_F_JC4.jpg', '165_F_JC6.jpg', '181_O_JC9.jpg', '107_O_CA3.png', '171_O_JC4.jpg', '040_O_NA1.png', '146_O_IB1.png', '180_O_NA2.png', '128_O_CR1.png', '115_F_NA3.png', '124_O_JC1.jpg', '151_F_IB1.png', '138_F_BC1.png', '016_F_CR2.png', '059_O_IB1.png', '148_O_JC7.jpg', '158_F_BC2.png', '159_O_BC3.png', '013_O_JC3.jpg', '138_F_BC2.png', '027_F_JC4.jpg', '005_F_BC2.png', '038_O_JC8.jpg', '096_O_CR3.png', '187_F_JC9.jpg', '172_O_BC1.png', '027_O_JC6.jpg', '187_F_JC5.jpg', '183_F_JC8.jpg', '050_F_JC8.jpg', '114_F_JC5.jpg', '123_F_CR2.png', '040_O_JC7.jpg', '156_O_IB2.png', '162_B.png', '169_O_JC6.jpg', '032_F_NA3.png', '037_F_JC3.jpg', '076_O_JC7.jpg', '142_F_NA3.png', '052_F_NA1.png', '200_O_JC1.jpg', '167_F_IB2.png', '098_F_IB1.png', '191_O_NA1.png', '082_O_CR2.png', '123_O_JC9.jpg', '179_F_CR3.png', 

In [None]:
import shutil

def organize_comofod_files(file_list, dataset_dir):
    """
    Organise les fichiers du dataset CoMoFoD en deux sous-dossiers : Ori/ et Faux/.

    Args:
        file_list (list): Liste des noms de fichiers à organiser.
        dataset_dir (str): Répertoire contenant les fichiers.

    Returns:
        None
    """
    # Créer les répertoires pour les classes
    ori_dir = os.path.join(dataset_dir, "Ori")
    faux_dir = os.path.join(dataset_dir, "Faux")
    os.makedirs(ori_dir, exist_ok=True)
    os.makedirs(faux_dir, exist_ok=True)

    # Parcourir les fichiers et les organiser
    for file_name in file_list:
        src_path = os.path.join(dataset_dir, file_name)

        # Vérifier si le fichier existe
        if not os.path.isfile(src_path):
            print(f"Fichier introuvable : {src_path}")
            continue

        # Déterminer la classe (Ori ou Faux)
        if "_F" in file_name or "F_" in file_name:
            dest_path = os.path.join(faux_dir, file_name)
        elif "_O" in file_name or "O_" in file_name:
            dest_path = os.path.join(ori_dir, file_name)
        else:
            print(f"Fichier ignoré (classe inconnue) : {file_name}")
            continue

        # Déplacer le fichier
        shutil.move(src_path, dest_path)
        print(f"Déplacé : {file_name} -> {'Faux' if '_F' in file_name else 'Ori'}")

In [None]:
organize_comofod_files(listdir(comoFqPath), comoFqPath)

[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
Déplacé : 155_O_JC2.jpg -> Ori
Déplacé : 089_F_JC1.jpg -> Faux
Déplacé : 116_F_JC7.jpg -> Faux
Déplacé : 109_F_JC9.jpg -> Faux
Déplacé : 019_F_JC8.jpg -> Faux
Déplacé : 014_O_JC1.jpg -> Ori
Déplacé : 067_F_IB1.png -> Faux
Déplacé : 018_F_JC4.jpg -> Faux
Déplacé : 032_O_JC3.jpg -> Ori
Déplacé : 190_F_IB2.png -> Faux
Déplacé : 035_O_CA2.png -> Ori
Déplacé : 139_F_JC4.jpg -> Faux
Déplacé : 029_F_CA1.png -> Faux
Déplacé : 171_F_NA3.png -> Faux
Fichier ignoré (classe inconnue) : 107_M.png
Déplacé : 159_F_IB1.png -> Faux
Déplacé : 140_O_IB1.png -> Ori
Déplacé : 106_F_JC3.jpg -> Faux
Déplacé : 110_F_NA1.png -> Faux
Déplacé : 138_F_CA2.png -> Faux
Déplacé : 053_O_IB1.png -> Ori
Déplacé : 111_O_IB2.png -> Ori
Déplacé : 136_O_JC1.jpg -> Ori
Déplacé : 085_O_NA2.png -> Ori
Déplacé : 198_F.png -> Faux
Déplacé : 029_O_JC6.jpg -> Ori
Déplacé : 040_O_IB2.png -> Ori
Déplacé : 191_F_BC3.png -> Faux
Déplacé : 072_

In [None]:
# Lancer l'évaluation
if __name__ == "__main__":
    # Répertoire du dataset CoMoFoD
    authentic_dir = comoFqPath+"/Ori"  # Changez ce chemin
    forgery_dir = comoFqPath+"/Faux"  # Changez ce chemin

    # Évaluer les deux méthodes
    true_labels, cnn_predictions, traditional_predictions = evaluate_methods(authentic_dir, forgery_dir)

    # Générer des graphiques et analyser les résultats
    cnn_scores = [score if score is not None else 0.0 for score in cnn_predictions]
    generate_graphs(true_labels, cnn_predictions, cnn_scores, traditional_predictions)

    print("Évaluation terminée.")

Authenticate files: 5000
Forgery files: 5000
Error in CNN for /root/.cache/kagglehub/datasets/tusharchauhan1898/comofod/versions/1/CoMoFoD_small_v2/Ori/015_O_JC6.jpg: Modèle CNN non chargé.


KeyboardInterrupt: 