In [None]:
"""
Script pour g√©n√©rer des morphings sur TOUTE la base de donn√©es LFW
Cr√©e des morphings entre toutes les paires de personnes possibles
"""

import numpy as np
import cv2
import dlib
import matplotlib.pyplot as plt
import os
from pathlib import Path
import urllib.request
import bz2
from sklearn.datasets import fetch_lfw_people
from itertools import combinations
from tqdm import tqdm
import time

# ------------------ Configuration ------------------
LOCAL_DATA_DIR = Path("./dlib_models")
LOCAL_DATA_DIR.mkdir(exist_ok=True)
PREDICTOR_PATH = LOCAL_DATA_DIR / "shape_predictor_68_face_landmarks.dat"

# Configuration du dataset
MIN_FACES_PER_PERSON = 30  # Minimum d'images par personne
RESIZE_FACTOR = 0.5        # Facteur de redimensionnement
SIZE = 128                 # Taille finale des images

# Configuration de g√©n√©ration
OUTPUT_DIR = Path("./morphing_results")
OUTPUT_DIR.mkdir(exist_ok=True)

# Options de g√©n√©ration
MODE = "all"  # "all" = toutes les combinaisons, "sample" = √©chantillon, "per_person" = 1 morphing par personne
NUM_SAMPLES = 100  # Si MODE="sample", nombre de morphings √† g√©n√©rer
ALPHA_VALUES = [0.5]  # Liste des alphas √† tester (ex: [0.3, 0.5, 0.7])
SAVE_INDIVIDUAL = True  # Sauvegarder chaque morphing individuellement
CREATE_GRID = True  # Cr√©er une grille de morphings
GRID_SIZE = (5, 5)  # Taille de la grille (lignes, colonnes)

print("="*70)
print("üé≠ G√âN√âRATION DE MORPHINGS SUR TOUTE LA BASE DE DONN√âES")
print("="*70)
print(f"\n‚öôÔ∏è  Configuration:")
print(f"   - Mode: {MODE}")
if MODE == "sample":
    print(f"   - Nombre d'√©chantillons: {NUM_SAMPLES}")
print(f"   - Alpha values: {ALPHA_VALUES}")
print(f"   - Taille des images: {SIZE}x{SIZE}")
print(f"   - Dossier de sortie: {OUTPUT_DIR}")
print(f"   - Sauvegarder individuellement: {SAVE_INDIVIDUAL}")
print(f"   - Cr√©er grille: {CREATE_GRID}")

# ------------------ T√©l√©charger le mod√®le Dlib ------------------
def download_dlib_predictor():
    """T√©l√©charge le fichier shape_predictor_68_face_landmarks.dat si n√©cessaire"""
    if PREDICTOR_PATH.exists():
        print(f"\n‚úì Mod√®le Dlib d√©j√† pr√©sent")
        return
    
    print("\nüì• T√©l√©chargement du mod√®le Dlib...")
    url = "http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2"
    compressed_file = LOCAL_DATA_DIR / "shape_predictor_68_face_landmarks.dat.bz2"
    
    try:
        urllib.request.urlretrieve(url, compressed_file)
        print("   D√©compression...")
        
        with bz2.BZ2File(compressed_file, 'rb') as f_in:
            with open(PREDICTOR_PATH, 'wb') as f_out:
                f_out.write(f_in.read())
        
        compressed_file.unlink()
        print("   ‚úì Mod√®le Dlib pr√™t")
        
    except Exception as e:
        raise RuntimeError(f"‚ùå Erreur lors du t√©l√©chargement du mod√®le Dlib : {e}")

# ------------------ Charger LFW Dataset ------------------
def load_lfw_compact():
    """Charge le dataset LFW"""
    print("\nüì• Chargement du dataset LFW...")
    
    try:
        lfw_people = fetch_lfw_people(
            min_faces_per_person=MIN_FACES_PER_PERSON,
            resize=RESIZE_FACTOR,
            color=False
        )
        
        images = lfw_people.images
        labels = lfw_people.target
        target_names = lfw_people.target_names
        
        n_samples, h, w = images.shape
        n_classes = len(target_names)
        
        print(f"‚úÖ Dataset charg√©!")
        print(f"   - Total images: {n_samples}")
        print(f"   - Personnes: {n_classes}")
        print(f"   - Taille: {h}x{w}")
        
        return images, labels, target_names
        
    except Exception as e:
        raise RuntimeError(f"‚ùå Erreur: {e}")

# T√©l√©charger le mod√®le Dlib
download_dlib_predictor()

# Charger Dlib
try:
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(str(PREDICTOR_PATH))
    print("‚úì Mod√®le Dlib charg√©")
except Exception as e:
    raise RuntimeError(f"‚ùå Impossible de charger Dlib: {e}")

# Charger le dataset
images, labels, target_names = load_lfw_compact()

# ------------------ Fonctions de morphing ------------------

def get_landmarks(img_gray, detector, predictor, upsample_times=0):
    """D√©tecte les landmarks faciaux"""
    dets = detector(img_gray, upsample_times)
    if len(dets) == 0:
        return None
    shape = predictor(img_gray, dets[0])
    pts = np.zeros((68, 2), dtype=np.int32)
    for i in range(68):
        pts[i] = (shape.part(i).x, shape.part(i).y)
    return pts

def add_corner_points(points, w, h):
    corners = np.array([
        [0, 0], [w - 1, 0], [w - 1, h - 1], [0, h - 1],
        [w // 2, 0], [w - 1, h // 2], [w // 2, h - 1], [0, h // 2]
    ], dtype=np.int32)
    return np.concatenate([points, corners], axis=0)

def clamp_points(points, w, h):
    pts = np.array(points, dtype=np.float32)
    pts[:, 0] = np.clip(pts[:, 0], 0, w - 1)
    pts[:, 1] = np.clip(pts[:, 1], 0, h - 1)
    return pts

def find_point_index(points, pt, tol=3.0):
    pts = np.asarray(points, dtype=np.float32)
    dists = np.linalg.norm(pts - np.asarray(pt, dtype=np.float32), axis=1)
    idx = int(np.argmin(dists))
    if dists[idx] <= tol:
        return idx
    return None

def triangle_completely_inside(t, w, h):
    for (x, y) in t:
        if x < 0 or x >= w or y < 0 or y >= h:
            return False
    return True

def apply_affine_transform(src, src_tri, dst_tri, size):
    warp_mat = cv2.getAffineTransform(np.float32(src_tri), np.float32(dst_tri))
    dst = cv2.warpAffine(src, warp_mat, (int(size[0]), int(size[1])),
                         None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
    return dst

def morph_triangle(img1, img2, img_morphed, t1, t2, t_morphed, alpha):
    r1 = cv2.boundingRect(np.float32([t1]))
    r2 = cv2.boundingRect(np.float32([t2]))
    r = cv2.boundingRect(np.float32([t_morphed]))

    if r1[2] <= 0 or r1[3] <= 0 or r2[2] <= 0 or r2[3] <= 0 or r[2] <= 0 or r[3] <= 0:
        return

    t1_rect = [(t1[i][0] - r1[0], t1[i][1] - r1[1]) for i in range(3)]
    t2_rect = [(t2[i][0] - r2[0], t2[i][1] - r2[1]) for i in range(3)]
    t_rect = [(t_morphed[i][0] - r[0], t_morphed[i][1] - r[1]) for i in range(3)]

    img1_rect = img1[r1[1]:r1[1]+r1[3], r1[0]:r1[0]+r1[2]]
    img2_rect = img2[r2[1]:r2[1]+r2[3], r2[0]:r2[0]+r2[2]]

    if img1_rect.size == 0 or img2_rect.size == 0:
        return

    size_rect = (r[2], r[3])

    warp_img1 = apply_affine_transform(img1_rect, t1_rect, t_rect, size_rect)
    warp_img2 = apply_affine_transform(img2_rect, t2_rect, t_rect, size_rect)

    img_rect = (1.0 - alpha) * warp_img1 + alpha * warp_img2

    mask = np.zeros((r[3], r[2]), dtype=np.float32)
    cv2.fillConvexPoly(mask, np.int32(t_rect), 1.0, 16, 0)

    y, x, w_rect, h_rect = r[1], r[0], r[2], r[3]
    img_morphed[y:y+h_rect, x:x+w_rect] = img_morphed[y:y+h_rect, x:x+w_rect] * (1 - mask[:, :, None]) + img_rect * mask[:, :, None]

def prepare_points_for_image(img_gray, detector, predictor, w, h):
    pts = get_landmarks(img_gray, detector, predictor, upsample_times=0)
    if pts is None:
        grid_x = np.tile(np.linspace(w*0.25, w*0.75, 17), (4,))
        grid_y = np.repeat(np.linspace(h*0.25, h*0.75, 4), 17)
        grid = np.vstack([grid_x[:68], grid_y[:68]]).T.astype(np.int32)
        pts = grid
    pts = clamp_points(pts, w, h)
    pts = add_corner_points(pts.astype(np.int32), w, h)
    return pts.astype(np.float32)

def morph_faces(imgA, imgB, alpha=0.5):
    """Morphe deux visages"""
    # Redimensionner
    imgA_resized = cv2.resize(imgA, (SIZE, SIZE), interpolation=cv2.INTER_CUBIC)
    imgB_resized = cv2.resize(imgB, (SIZE, SIZE), interpolation=cv2.INTER_CUBIC)
    
    # Convertir en uint8
    if imgA_resized.dtype != np.uint8:
        imgA_resized = (imgA_resized * 255).astype(np.uint8)
    if imgB_resized.dtype != np.uint8:
        imgB_resized = (imgB_resized * 255).astype(np.uint8)
    
    # Convertir en couleur
    imgA_color = cv2.cvtColor(imgA_resized, cv2.COLOR_GRAY2BGR).astype(np.float32)
    imgB_color = cv2.cvtColor(imgB_resized, cv2.COLOR_GRAY2BGR).astype(np.float32)

    # Pr√©parer les points
    ptsA = prepare_points_for_image(imgA_resized, detector, predictor, SIZE, SIZE)
    ptsB = prepare_points_for_image(imgB_resized, detector, predictor, SIZE, SIZE)

    # Points morph√©s
    points_morphed = (1.0 - alpha) * ptsA + alpha * ptsB
    points_morphed = clamp_points(points_morphed, SIZE, SIZE)

    # Triangulation de Delaunay
    rect = (0, 0, SIZE, SIZE)
    subdiv = cv2.Subdiv2D(rect)

    for p in points_morphed:
        x, y = float(p[0]), float(p[1])
        if 0 <= x < SIZE and 0 <= y < SIZE:
            subdiv.insert((x, y))

    triangle_list = subdiv.getTriangleList()

    tri_indices = []
    for t in triangle_list:
        tri_pts = [(t[0], t[1]), (t[2], t[3]), (t[4], t[5])]
        inds = []
        valid = True
        for p in tri_pts:
            idx = find_point_index(points_morphed, p, tol=5.0)
            if idx is None:
                valid = False
                break
            inds.append(idx)
        if valid and len(set(inds)) == 3:
            tri_indices.append(tuple(inds))

    tri_indices = list(set(tri_indices))

    # Morphing
    img_morphed = np.zeros_like(imgA_color, dtype=np.float32)

    for tri in tri_indices:
        i1, i2, i3 = tri
        tA = [ptsA[i1], ptsA[i2], ptsA[i3]]
        tB = [ptsB[i1], ptsB[i2], ptsB[i3]]
        tM = [points_morphed[i1], points_morphed[i2], points_morphed[i3]]

        if not (triangle_completely_inside(tA, SIZE, SIZE) and 
                triangle_completely_inside(tB, SIZE, SIZE) and 
                triangle_completely_inside(tM, SIZE, SIZE)):
            continue

        morph_triangle(imgA_color, imgB_color, img_morphed, tA, tB, tM, alpha)

    return np.clip(img_morphed, 0, 255).astype(np.uint8)

# ------------------ G√©n√©ration des morphings ------------------

def generate_all_morphings():
    """G√©n√®re tous les morphings possibles"""
    unique_labels = np.unique(labels)
    n_people = len(unique_labels)
    
    # Calculer le nombre total de combinaisons
    if MODE == "all":
        total_combinations = n_people * (n_people - 1) // 2
        print(f"\nüî¢ G√©n√©ration de TOUS les morphings")
        print(f"   - Nombre de personnes: {n_people}")
        print(f"   - Combinaisons possibles: {total_combinations}")
        print(f"   - Alpha values: {len(ALPHA_VALUES)}")
        print(f"   - Total morphings √† g√©n√©rer: {total_combinations * len(ALPHA_VALUES)}")
        
        if total_combinations > 10000:
            print(f"\n‚ö†Ô∏è  ATTENTION: Cela va g√©n√©rer {total_combinations * len(ALPHA_VALUES)} images!")
            print(f"   Cela peut prendre plusieurs heures...")
            response = input("   Continuer? (o/n): ")
            if response.lower() not in ['o', 'oui', 'y', 'yes']:
                print("   Annul√©.")
                return
        
        person_pairs = list(combinations(unique_labels, 2))
        
    elif MODE == "sample":
        total_combinations = min(NUM_SAMPLES, n_people * (n_people - 1) // 2)
        print(f"\nüî¢ G√©n√©ration d'un √©chantillon de morphings")
        print(f"   - Nombre de personnes: {n_people}")
        print(f"   - Morphings √† g√©n√©rer: {total_combinations * len(ALPHA_VALUES)}")
        
        all_pairs = list(combinations(unique_labels, 2))
        person_pairs = [all_pairs[i] for i in np.random.choice(len(all_pairs), total_combinations, replace=False)]
        
    elif MODE == "per_person":
        total_combinations = n_people
        print(f"\nüî¢ G√©n√©ration d'un morphing par personne")
        print(f"   - Nombre de personnes: {n_people}")
        print(f"   - Total morphings: {total_combinations * len(ALPHA_VALUES)}")
        
        person_pairs = []
        for i, person_a in enumerate(unique_labels):
            person_b = unique_labels[(i + 1) % n_people]
            person_pairs.append((person_a, person_b))
    
    print(f"\nüöÄ D√©but de la g√©n√©ration...")
    
    morphed_images = []
    metadata = []
    
    start_time = time.time()
    
    # G√©n√©rer les morphings avec barre de progression
    for idx, (person_a, person_b) in enumerate(tqdm(person_pairs, desc="Morphing")):
        # R√©cup√©rer les images
        imgs_a = images[labels == person_a]
        imgs_b = images[labels == person_b]
        
        # S√©lectionner une image al√©atoire pour chaque personne
        imgA = imgs_a[np.random.randint(len(imgs_a))]
        imgB = imgs_b[np.random.randint(len(imgs_b))]
        
        name_a = target_names[person_a]
        name_b = target_names[person_b]
        
        # G√©n√©rer pour chaque alpha
        for alpha in ALPHA_VALUES:
            try:
                img_morphed = morph_faces(imgA, imgB, alpha=alpha)
                
                # Sauvegarder individuellement si demand√©
                if SAVE_INDIVIDUAL:
                    filename = f"morph_{idx:04d}_{name_a[:10]}_{name_b[:10]}_alpha{alpha:.2f}.png"
                    filepath = OUTPUT_DIR / filename
                    cv2.imwrite(str(filepath), img_morphed)
                
                # Ajouter √† la liste pour la grille
                morphed_images.append(img_morphed)
                metadata.append({
                    'person_a': name_a,
                    'person_b': name_b,
                    'alpha': alpha,
                    'index': idx
                })
                
            except Exception as e:
                print(f"\n‚ö†Ô∏è  Erreur sur {name_a} + {name_b}: {e}")
                continue
    
    elapsed_time = time.time() - start_time
    
    print(f"\n‚úÖ G√©n√©ration termin√©e!")
    print(f"   - Temps √©coul√©: {elapsed_time:.1f} secondes ({elapsed_time/60:.1f} minutes)")
    print(f"   - Images g√©n√©r√©es: {len(morphed_images)}")
    print(f"   - Vitesse: {len(morphed_images)/elapsed_time:.2f} images/seconde")
    
    # Cr√©er une grille si demand√©
    if CREATE_GRID and len(morphed_images) > 0:
        print(f"\nüìä Cr√©ation de la grille...")
        create_grid_visualization(morphed_images, metadata)
    
    print(f"\nüíæ Tous les fichiers sont dans: {OUTPUT_DIR}")

def create_grid_visualization(morphed_images, metadata):
    """Cr√©e une grille de visualisation"""
    rows, cols = GRID_SIZE
    n_images = min(len(morphed_images), rows * cols)
    
    fig, axes = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axes = axes.flatten() if rows * cols > 1 else [axes]
    
    for i in range(rows * cols):
        if i < n_images:
            img = morphed_images[i]
            meta = metadata[i]
            axes[i].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            title = f"{meta['person_a'][:10]}\n+\n{meta['person_b'][:10]}"
            axes[i].set_title(title, fontsize=8)
        axes[i].axis('off')
    
    plt.tight_layout()
    grid_file = OUTPUT_DIR / f"morphing_grid_{rows}x{cols}.png"
    plt.savefig(grid_file, dpi=150, bbox_inches='tight')
    print(f"   ‚úì Grille sauvegard√©e: {grid_file}")
    plt.close()

# ------------------ Programme Principal ------------------
def main():
    # D√©placer la d√©claration global au d√©but de la fonction
    global MODE, NUM_SAMPLES, ALPHA_VALUES
    
    print(f"\n{'='*70}")
    print(f"‚öôÔ∏è  PARAM√àTRES")
    print(f"{'='*70}")
    print(f"\nVoulez-vous modifier les param√®tres? (d√©faut: non)")
    print(f"Configuration actuelle:")
    print(f"  - Mode: {MODE}")
    print(f"  - Alpha values: {ALPHA_VALUES}")
    if MODE == "sample":
        print(f"  - Nombre d'√©chantillons: {NUM_SAMPLES}")
    
    response = input("\nModifier? (o/N): ")
    
    if response.lower() in ['o', 'oui', 'y', 'yes']:
        print("\nMode de g√©n√©ration:")
        print("  1. all - TOUTES les combinaisons (peut √™tre tr√®s long!)")
        print("  2. sample - Un √©chantillon al√©atoire")
        print("  3. per_person - Un morphing par personne")
        
        mode_choice = input("Choix (1/2/3): ").strip()
        if mode_choice == "1":
            MODE = "all"
        elif mode_choice == "2":
            MODE = "sample"
            NUM_SAMPLES = int(input("Nombre d'√©chantillons (ex: 100): "))
        elif mode_choice == "3":
            MODE = "per_person"
        
        alpha_input = input("Alpha values (ex: 0.3,0.5,0.7): ").strip()
        if alpha_input:
            ALPHA_VALUES = [float(x) for x in alpha_input.split(',')]
    
    # G√©n√©rer les morphings
    generate_all_morphings()
    
    print(f"\n{'='*70}")
    print(f"‚úÖ TERMIN√â!")
    print(f"{'='*70}")
    print(f"\nüìÇ Vos morphings sont dans: {OUTPUT_DIR}")
    print(f"\nüí° Astuce: Utilisez MODE='sample' pour des tests rapides!")

if __name__ == "__main__":
    main()

üé≠ G√âN√âRATION DE MORPHINGS SUR TOUTE LA BASE DE DONN√âES

‚öôÔ∏è  Configuration:
   - Mode: all
   - Alpha values: [0.5]
   - Taille des images: 128x128
   - Dossier de sortie: morphing_results
   - Sauvegarder individuellement: True
   - Cr√©er grille: True

‚úì Mod√®le Dlib d√©j√† pr√©sent
‚úì Mod√®le Dlib charg√©

üì• Chargement du dataset LFW...
‚úÖ Dataset charg√©!
   - Total images: 2370
   - Personnes: 34
   - Taille: 62x47

‚öôÔ∏è  PARAM√àTRES

Voulez-vous modifier les param√®tres? (d√©faut: non)
Configuration actuelle:
  - Mode: all
  - Alpha values: [0.5]

Mode de g√©n√©ration:
  1. all - TOUTES les combinaisons (peut √™tre tr√®s long!)
  2. sample - Un √©chantillon al√©atoire
  3. per_person - Un morphing par personne

üî¢ G√©n√©ration de TOUS les morphings
   - Nombre de personnes: 34
   - Combinaisons possibles: 561
   - Alpha values: 1
   - Total morphings √† g√©n√©rer: 561

üöÄ D√©but de la g√©n√©ration...


Morphing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 561/561 [00:50<00:00, 11.01it/s]



‚úÖ G√©n√©ration termin√©e!
   - Temps √©coul√©: 51.0 secondes (0.8 minutes)
   - Images g√©n√©r√©es: 561
   - Vitesse: 11.01 images/seconde

üìä Cr√©ation de la grille...
   ‚úì Grille sauvegard√©e: morphing_results\morphing_grid_5x5.png

üíæ Tous les fichiers sont dans: morphing_results

‚úÖ TERMIN√â!

üìÇ Vos morphings sont dans: morphing_results

üí° Astuce: Utilisez MODE='sample' pour des tests rapides!


In [4]:
import os
import shutil
from pathlib import Path
import numpy as np

# ------------------ Organisation du Dataset ------------------
def create_training_dataset():
    """
    Organise les donn√©es pour l'entra√Ænement du mod√®le
    Structure requise:
    ./dataset/
    ‚îú‚îÄ‚îÄ real/          # Images r√©elles
    ‚îî‚îÄ‚îÄ morph/         # Images morph√©es
    """
    
    print("üìÅ Cr√©ation de la structure du dataset...")
    
    # Cr√©er la structure de dossiers
    dataset_dir = Path("./dataset")
    real_dir = dataset_dir / "real"
    morph_dir = dataset_dir / "morph"
    
    # Supprimer et recr√©er les dossiers
    if dataset_dir.exists():
        shutil.rmtree(dataset_dir)
    
    dataset_dir.mkdir(exist_ok=True)
    real_dir.mkdir(exist_ok=True)
    morph_dir.mkdir(exist_ok=True)
    
    # 1. Copier les images morph√©es
    morphing_results = Path("./morphing_results")
    if morphing_results.exists():
        morph_files = list(morphing_results.glob("*.png"))
        print(f"   - Trouv√© {len(morph_files)} images morph√©es")
        
        for i, morph_file in enumerate(morph_files):
            dest = morph_dir / f"morph_{i:04d}.png"
            shutil.copy2(morph_file, dest)
    else:
        print("   ‚ö†Ô∏è  Dossier morphing_results introuvable!")
        print("   Ex√©cutez d'abord la g√©n√©ration de morphings")
        return False
    
    # 2. Cr√©er des images "r√©elles" √† partir du dataset LFW
    print("   - G√©n√©ration d'images r√©elles...")
    
    # Utiliser quelques images du dataset LFW comme "vraies" images
    num_real_images = len(morph_files)  # M√™me nombre que les morphings
    selected_indices = np.random.choice(len(images), num_real_images, replace=False)
    
    for i, idx in enumerate(selected_indices):
        # Redimensionner et sauvegarder l'image originale
        img_original = images[idx]
        img_resized = cv2.resize(img_original, (SIZE, SIZE))
        
        # Convertir en uint8 si n√©cessaire
        if img_resized.dtype != np.uint8:
            img_resized = (img_resized * 255).astype(np.uint8)
        
        # Convertir en couleur pour √™tre coh√©rent avec les morphings
        img_color = cv2.cvtColor(img_resized, cv2.COLOR_GRAY2BGR)
        
        # Sauvegarder
        dest = real_dir / f"real_{i:04d}.png"
        cv2.imwrite(str(dest), img_color)
    
    print(f"‚úÖ Dataset cr√©√©!")
    print(f"   - Images r√©elles: {len(list(real_dir.glob('*.png')))}")
    print(f"   - Images morph√©es: {len(list(morph_dir.glob('*.png')))}")
    print(f"   - Total: {len(list(real_dir.glob('*.png'))) + len(list(morph_dir.glob('*.png')))}")
    
    return True

# Cr√©er le dataset
success = create_training_dataset()

if success:
    print("\nüéØ Dataset pr√™t pour l'entra√Ænement!")
else:
    print("\n‚ùå Probl√®me lors de la cr√©ation du dataset")
    print("   Assurez-vous d'avoir d'abord ex√©cut√© la g√©n√©ration de morphings")

üìÅ Cr√©ation de la structure du dataset...
   - Trouv√© 562 images morph√©es
   - G√©n√©ration d'images r√©elles...
‚úÖ Dataset cr√©√©!
   - Images r√©elles: 562
   - Images morph√©es: 562
   - Total: 1124

üéØ Dataset pr√™t pour l'entra√Ænement!


In [5]:
# ==================================================================================
# PROJET: Face Blending et Attaques par Inf√©rence d'Appartenance (MIA)
# Bas√© sur: Ghorbel et al. (2024) & Shokri et al. (2017)
# ==================================================================================

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.applications import ResNet50, InceptionV3, MobileNetV2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import pandas as pd
from pathlib import Path
import cv2
import os
from tqdm import tqdm
import json
import pickle
from datetime import datetime

# Configuration globale
class Config:
    """Configuration centralis√©e pour le projet MIA"""
    
    # Param√®tres des donn√©es
    IMG_SIZE = (128, 128)
    BATCH_SIZE = 32
    NUM_CLASSES = 50  # Nombre d'identit√©s √† classifier
    
    # Param√®tres d'entra√Ænement
    EPOCHS = 50
    LEARNING_RATE = 0.001
    
    # Param√®tres du Face Blending
    BLEND_RATIOS = [0.3, 0.5, 0.7]  # Diff√©rents ratios de m√©lange
    AUGMENTATION_FACTOR = 3  # Combien d'images blend√©es par identit√©
    
    # Param√®tres MIA
    MIA_EPOCHS = 30
    MIA_BATCH_SIZE = 64
    SHADOW_MODELS = 5  # Nombre de mod√®les shadow pour MIA
    
    # Chemins
    BASE_DIR = Path("./mia_project")
    DATA_DIR = BASE_DIR / "data"
    MODELS_DIR = BASE_DIR / "models"
    RESULTS_DIR = BASE_DIR / "results"
    
    def __init__(self):
        # Cr√©er les dossiers
        for directory in [self.BASE_DIR, self.DATA_DIR, self.MODELS_DIR, self.RESULTS_DIR]:
            directory.mkdir(exist_ok=True, parents=True)

config = Config()
print("üìÅ Structure du projet cr√©√©e")
print(f"   - Dossier principal: {config.BASE_DIR}")
print(f"   - {config.NUM_CLASSES} identit√©s √† classifier")
print(f"   - Taille d'images: {config.IMG_SIZE}")

üìÅ Structure du projet cr√©√©e
   - Dossier principal: mia_project
   - 50 identit√©s √† classifier
   - Taille d'images: (128, 128)


In [6]:
# ==================================================================================
# PROJET: Face Blending et Attaques par Inf√©rence d'Appartenance (MIA)
# Bas√© sur: Ghorbel et al. (2024) & Shokri et al. (2017)
# ==================================================================================

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.applications import ResNet50, InceptionV3, MobileNetV2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import pandas as pd
from pathlib import Path
import cv2
import os
from tqdm import tqdm
import json
import pickle
from datetime import datetime

# Configuration globale
class Config:
    """Configuration centralis√©e pour le projet MIA"""
    
    # Param√®tres des donn√©es
    IMG_SIZE = (128, 128)
    BATCH_SIZE = 32
    NUM_CLASSES = 50  # Nombre d'identit√©s √† classifier
    
    # Param√®tres d'entra√Ænement
    EPOCHS = 50
    LEARNING_RATE = 0.001
    
    # Param√®tres du Face Blending
    BLEND_RATIOS = [0.3, 0.5, 0.7]  # Diff√©rents ratios de m√©lange
    AUGMENTATION_FACTOR = 3  # Combien d'images blend√©es par identit√©
    
    # Param√®tres MIA
    MIA_EPOCHS = 30
    MIA_BATCH_SIZE = 64
    SHADOW_MODELS = 5  # Nombre de mod√®les shadow pour MIA
    
    # Chemins
    BASE_DIR = Path("./mia_project")
    DATA_DIR = BASE_DIR / "data"
    MODELS_DIR = BASE_DIR / "models"
    RESULTS_DIR = BASE_DIR / "results"
    
    def __init__(self):
        # Cr√©er les dossiers
        for directory in [self.BASE_DIR, self.DATA_DIR, self.MODELS_DIR, self.RESULTS_DIR]:
            directory.mkdir(exist_ok=True, parents=True)

config = Config()
print("üìÅ Structure du projet cr√©√©e")
print(f"   - Dossier principal: {config.BASE_DIR}")
print(f"   - {config.NUM_CLASSES} identit√©s √† classifier")
print(f"   - Taille d'images: {config.IMG_SIZE}")

üìÅ Structure du projet cr√©√©e
   - Dossier principal: mia_project
   - 50 identit√©s √† classifier
   - Taille d'images: (128, 128)


In [7]:
class FaceBlendingDataGenerator:
    """
    G√©n√©rateur de donn√©es avec Face Blending pour am√©liorer la confidentialit√©
    Bas√© sur Ghorbel et al. (2024)
    """
    
    def __init__(self, config):
        self.config = config
        self.original_data = None
        self.blended_data = None
        self.labels = None
        
    def load_lfw_subset(self):
        """Charge un sous-ensemble du dataset LFW avec des identit√©s s√©lectionn√©es"""
        print("\nüì• Chargement du dataset LFW...")
        
        from sklearn.datasets import fetch_lfw_people
        
        # Charger avec plus d'images par personne pour avoir assez de donn√©es
        lfw_people = fetch_lfw_people(
            min_faces_per_person=20,  # Au moins 20 images par personne
            resize=0.5,
            color=False
        )
        
        # S√©lectionner les N premi√®res identit√©s
        unique_labels = np.unique(lfw_people.target)[:self.config.NUM_CLASSES]
        
        # Filtrer les donn√©es
        mask = np.isin(lfw_people.target, unique_labels)
        images = lfw_people.images[mask]
        labels = lfw_people.target[mask]
        
        # Remapper les labels de 0 √† N-1
        label_mapping = {old_label: new_label for new_label, old_label in enumerate(unique_labels)}
        labels = np.array([label_mapping[label] for label in labels])
        
        # Redimensionner et normaliser
        processed_images = []
        for img in tqdm(images, desc="Preprocessing"):
            # Redimensionner
            img_resized = cv2.resize(img, self.config.IMG_SIZE)
            
            # Convertir en couleur
            if len(img_resized.shape) == 2:
                img_resized = cv2.cvtColor(img_resized, cv2.COLOR_GRAY2RGB)
            
            # Normaliser
            img_normalized = img_resized.astype(np.float32) / 255.0
            processed_images.append(img_normalized)
        
        self.original_data = np.array(processed_images)
        self.labels = labels
        
        print(f"‚úÖ Dataset charg√©:")
        print(f"   - {len(self.original_data)} images")
        print(f"   - {self.config.NUM_CLASSES} identit√©s")
        print(f"   - Forme: {self.original_data.shape}")
        
        return self.original_data, self.labels
    
    def create_blended_faces(self, alpha_values=None):
        """
        Cr√©e des visages m√©lang√©s (Face Blending) inter-classe
        """
        if alpha_values is None:
            alpha_values = self.config.BLEND_RATIOS
            
        print(f"\nüé≠ G√©n√©ration de Face Blending...")
        print(f"   - Ratios de m√©lange: {alpha_values}")
        
        blended_images = []
        blended_labels = []
        blend_metadata = []
        
        for class_id in tqdm(range(self.config.NUM_CLASSES), desc="Blending identit√©s"):
            # Images de cette classe
            class_mask = self.labels == class_id
            class_images = self.original_data[class_mask]
            
            # Pour chaque ratio de m√©lange
            for alpha in alpha_values:
                # Pour chaque facteur d'augmentation
                for aug_idx in range(self.config.AUGMENTATION_FACTOR):
                    # S√©lectionner deux images al√©atoirement
                    if len(class_images) >= 2:
                        idx1, idx2 = np.random.choice(len(class_images), 2, replace=False)
                        img1 = class_images[idx1]
                        img2 = class_images[idx2]
                        
                        # Face blending simple (m√©lange lin√©aire)
                        blended_img = alpha * img1 + (1 - alpha) * img2
                        
                        # Ajouter du bruit l√©ger pour plus de r√©alisme
                        noise = np.random.normal(0, 0.01, blended_img.shape)
                        blended_img = np.clip(blended_img + noise, 0, 1)
                        
                        blended_images.append(blended_img)
                        blended_labels.append(class_id)  # Garde le label de la classe
                        
                        blend_metadata.append({
                            'class_id': class_id,
                            'alpha': alpha,
                            'aug_idx': aug_idx,
                            'source_indices': [idx1, idx2]
                        })
        
        self.blended_data = np.array(blended_images)
        self.blend_metadata = blend_metadata
        
        print(f"‚úÖ Face Blending termin√©:")
        print(f"   - {len(blended_images)} images blend√©es g√©n√©r√©es")
        print(f"   - Facteur d'augmentation: {len(blended_images) / len(self.original_data):.1f}x")
        
        return self.blended_data, np.array(blended_labels)
    
    def prepare_training_data(self, include_blended=True, test_size=0.2):
        """
        Pr√©pare les donn√©es d'entra√Ænement en combinant originales et blend√©es
        """
        print(f"\nüìä Pr√©paration des donn√©es d'entra√Ænement...")
        
        if include_blended and self.blended_data is not None:
            # Combiner donn√©es originales et blend√©es
            all_images = np.vstack([self.original_data, self.blended_data])
            original_labels = self.labels
            blended_labels = np.array([meta['class_id'] for meta in self.blend_metadata])
            all_labels = np.hstack([original_labels, blended_labels])
            
            # Marquer quelles images sont blend√©es (pour MIA)
            is_blended = np.hstack([
                np.zeros(len(self.original_data), dtype=bool),  # Originales = False
                np.ones(len(self.blended_data), dtype=bool)     # Blend√©es = True
            ])
            
        else:
            all_images = self.original_data
            all_labels = self.labels
            is_blended = np.zeros(len(self.original_data), dtype=bool)
        
        # Division train/test stratifi√©e
        X_train, X_test, y_train, y_test, blend_train, blend_test = train_test_split(
            all_images, all_labels, is_blended,
            test_size=test_size,
            stratify=all_labels,
            random_state=42
        )
        
        print(f"‚úÖ Donn√©es pr√©par√©es:")
        print(f"   - Train: {len(X_train)} images ({np.sum(blend_train)} blend√©es)")
        print(f"   - Test: {len(X_test)} images ({np.sum(blend_test)} blend√©es)")
        print(f"   - Classes: {len(np.unique(all_labels))}")
        
        return {
            'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test,
            'is_blended_train': blend_train, 'is_blended_test': blend_test
        }

# Initialisation du g√©n√©rateur de donn√©es
data_generator = FaceBlendingDataGenerator(config)
original_data, labels = data_generator.load_lfw_subset()


üì• Chargement du dataset LFW...


Preprocessing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2527/2527 [00:00<00:00, 5256.18it/s]


MemoryError: Unable to allocate 474. MiB for an array with shape (2527, 128, 128, 3) and data type float32

In [None]:
class AdvancedCNNClassifier:
    """
    Mod√®le CNN avanc√© pour classification d'identit√©s avec transfer learning
    """
    
    def __init__(self, config, architecture='resnet50'):
        self.config = config
        self.architecture = architecture
        self.model = None
        self.history = None
        
    def build_model(self):
        """Construit le mod√®le avec transfer learning"""
        print(f"\nüèóÔ∏è  Construction du mod√®le {self.architecture.upper()}...")
        
        # Mod√®le de base pr√©-entra√Æn√©
        if self.architecture == 'resnet50':
            base_model = ResNet50(
                weights='imagenet',
                include_top=False,
                input_shape=(*self.config.IMG_SIZE, 3)
            )
        elif self.architecture == 'inception':
            base_model = InceptionV3(
                weights='imagenet',
                include_top=False,
                input_shape=(*self.config.IMG_SIZE, 3)
            )
        elif self.architecture == 'mobilenet':
            base_model = MobileNetV2(
                weights='imagenet',
                include_top=False,
                input_shape=(*self.config.IMG_SIZE, 3)
            )
        
        # Geler les couches de base initialement
        base_model.trainable = False
        
        # Ajouter les couches de classification
        self.model = models.Sequential([
            base_model,
            layers.GlobalAveragePooling2D(),
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            layers.Dense(512, activation='relu'),
            layers.BatchNormalization(),
            layers.Dropout(0.5),
            layers.Dense(256, activation='relu'),
            layers.Dropout(0.3),
            layers.Dense(self.config.NUM_CLASSES, activation='softmax')
        ])
        
        # Compilation
        self.model.compile(
            optimizer=optimizers.Adam(learning_rate=self.config.LEARNING_RATE),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy', 'top_3_accuracy']
        )
        
        print(f"‚úÖ Mod√®le construit:")
        print(f"   - Architecture: {self.architecture}")
        print(f"   - Param√®tres total: {self.model.count_params():,}")
        
        return self.model
    
    def train(self, X_train, y_train, X_val, y_val, fine_tune=True):
        """Entra√Æne le mod√®le avec fine-tuning optionnel"""
        print(f"\nüöÄ D√©but de l'entra√Ænement...")
        
        # Callbacks
        callbacks_list = [
            callbacks.EarlyStopping(
                monitor='val_accuracy',
                patience=10,
                restore_best_weights=True
            ),
            callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=5,
                min_lr=1e-7
            ),
            callbacks.ModelCheckpoint(
                self.config.MODELS_DIR / f"{self.architecture}_best.h5",
                monitor='val_accuracy',
                save_best_only=True
            )
        ]
        
        # Phase 1: Entra√Ænement avec base gel√©e
        print("üìö Phase 1: Entra√Ænement des couches de classification...")
        history1 = self.model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=self.config.EPOCHS // 2,
            batch_size=self.config.BATCH_SIZE,
            callbacks=callbacks_list,
            verbose=1
        )
        
        # Phase 2: Fine-tuning si demand√©
        if fine_tune:
            print("üîß Phase 2: Fine-tuning...")
            
            # D√©geler les derni√®res couches du mod√®le de base
            base_model = self.model.layers[0]
            base_model.trainable = True
            
            # Geler les premi√®res couches (garder les features de bas niveau)
            for layer in base_model.layers[:-50]:  # Ajuster selon l'architecture
                layer.trainable = False
            
            # R√©compiler avec un learning rate plus bas
            self.model.compile(
                optimizer=optimizers.Adam(learning_rate=self.config.LEARNING_RATE/10),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy', 'top_3_accuracy']
            )
            
            # Continuer l'entra√Ænement
            history2 = self.model.fit(
                X_train, y_train,
                validation_data=(X_val, y_val),
                epochs=self.config.EPOCHS // 2,
                batch_size=self.config.BATCH_SIZE,
                callbacks=callbacks_list,
                verbose=1
            )
            
            # Combiner les historiques
            for key in history1.history:
                history1.history[key].extend(history2.history[key])
        
        self.history = history1
        print("‚úÖ Entra√Ænement termin√©!")
        
        return self.history
    
    def evaluate(self, X_test, y_test):
        """√âvalue le mod√®le et retourne les m√©triques d√©taill√©es"""
        print("\nüìä √âvaluation du mod√®le...")
        
        # Pr√©dictions
        y_pred_probs = self.model.predict(X_test, batch_size=self.config.BATCH_SIZE)
        y_pred = np.argmax(y_pred_probs, axis=1)
        
        # M√©triques de base
        accuracy = np.mean(y_pred == y_test)
        
        # Rapport de classification
        report = classification_report(y_test, y_pred, output_dict=True)
        
        # Matrice de confusion
        conf_matrix = confusion_matrix(y_test, y_pred)
        
        print(f"‚úÖ R√©sultats d'√©valuation:")
        print(f"   - Accuracy: {accuracy:.4f}")
        print(f"   - Macro F1: {report['macro avg']['f1-score']:.4f}")
        print(f"   - Weighted F1: {report['weighted avg']['f1-score']:.4f}")
        
        return {
            'accuracy': accuracy,
            'predictions': y_pred,
            'probabilities': y_pred_probs,
            'classification_report': report,
            'confusion_matrix': conf_matrix
        }

# Cr√©ation du mod√®le
classifier = AdvancedCNNClassifier(config, architecture='resnet50')
model = classifier.build_model()


üèóÔ∏è  Construction du mod√®le RESNET50...


In [3]:
import numpy as np
import cv2
import gc
from tqdm import tqdm
from sklearn.model_selection import train_test_split

class FaceBlendingDataGenerator:
    def __init__(self, config):
        self.config = config
        self.original_data = None
        self.blended_data = None
        self.labels = None

    def load_lfw_subset(self):
        from sklearn.datasets import fetch_lfw_people
        print("\nüì• Chargement du dataset LFW (Optimis√© RAM)...")
        
        lfw_people = fetch_lfw_people(min_faces_per_person=20, resize=0.5, color=False)
        unique_labels = np.unique(lfw_people.target)[:self.config.NUM_CLASSES]
        mask = np.isin(lfw_people.target, unique_labels)
        images = lfw_people.images[mask]
        raw_labels = lfw_people.target[mask]
        
        label_mapping = {old: new for new, old in enumerate(unique_labels)}
        self.labels = np.array([label_mapping[l] for l in raw_labels])
        
        n_imgs = len(images)
        self.original_data = np.zeros((n_imgs, *self.config.IMG_SIZE, 3), dtype=np.float32)
        
        for i, img in enumerate(tqdm(images, desc="Preprocessing")):
            img_res = cv2.resize(img, self.config.IMG_SIZE)
            if len(img_res.shape) == 2:
                img_res = cv2.cvtColor(img_res, cv2.COLOR_GRAY2RGB)
            self.original_data[i] = img_res.astype(np.float32) / 255.0
            
        del images
        gc.collect()
        return self.original_data, self.labels

    def create_blended_faces(self):
        # CORRECTION ICI : Ajout du pr√©fixe r pour √©viter SyntaxWarning
        r"""M√©lange lin√©aire : $$I_{blend} = \alpha \cdot I_1 + (1 - \alpha) \cdot I_2$$"""
        alpha_values = self.config.BLEND_RATIOS
        print(f"\nüé≠ G√©n√©ration de Face Blending...")
        
        blended_list = []
        blended_labels = []
        
        for class_id in tqdm(range(self.config.NUM_CLASSES), desc="Blending"):
            class_mask = self.labels == class_id
            class_images = self.original_data[class_mask]
            
            if len(class_images) < 2: continue

            for alpha in alpha_values:
                for _ in range(self.config.AUGMENTATION_FACTOR):
                    idx1, idx2 = np.random.choice(len(class_images), 2, replace=False)
                    blend = alpha * class_images[idx1] + (1 - alpha) * class_images[idx2]
                    blend = np.clip(blend + np.random.normal(0, 0.01, blend.shape), 0, 1)
                    blended_list.append(blend)
                    blended_labels.append(class_id)

        self.blended_data = np.array(blended_list, dtype=np.float32)
        self.blended_labels = np.array(blended_labels)
        print(f"‚úÖ Blending termin√© : {len(self.blended_data)} images g√©n√©r√©es.")
        return self.blended_data, self.blended_labels

    def prepare_training_data(self):
        X = np.concatenate([self.original_data, self.blended_data], axis=0)
        y = np.concatenate([self.labels, self.blended_labels], axis=0)
        return train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [6]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models, optimizers, callbacks

class AdvancedCNNClassifier:
    def __init__(self, config, architecture='mobilenet'):
        self.config = config
        self.architecture = architecture
        # Nettoyage pr√©ventif
        tf.keras.backend.clear_session()
        gc.collect()

    def build_model(self):
        print(f"\nüèóÔ∏è Construction du mod√®le {self.architecture.upper()}...")
        
        # Utilisation de MobileNetV2 (Beaucoup plus l√©ger que ResNet)
        base_model = MobileNetV2(
            weights='imagenet',
            include_top=False,
            input_shape=(*self.config.IMG_SIZE, 3)
        )
        base_model.trainable = False

        self.model = models.Sequential([
            base_model,
            layers.GlobalAveragePooling2D(),
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            layers.Dense(256, activation='relu'), # R√©duit de 512 √† 256 pour gagner de la RAM
            layers.Dense(self.config.NUM_CLASSES, activation='softmax')
        ])

        self.model.compile(
            optimizer=optimizers.Adam(learning_rate=self.config.LEARNING_RATE),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        print(f"‚úÖ Mod√®le pr√™t. Param√®tres : {self.model.count_params():,}")
        return self.model

    def train(self, X_train, y_train, X_val, y_val):
        # Utilisation de batch_size plus petit si √ßa plante encore
        return self.model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=self.config.EPOCHS,
            batch_size=self.config.BATCH_SIZE, # Essaye 16 si 32 √©choue
            callbacks=[
                callbacks.EarlyStopping(patience=5, restore_best_weights=True),
                callbacks.ReduceLROnPlateau(factor=0.5, patience=3)
            ]
        )

ImportError: DLL load failed while importing _pywrap_dtensor_device: Le module sp√©cifi√© est introuvable.

In [11]:
# ==================================================================================
# SOLUTION COMPL√àTE ET OPTIMIS√âE POUR LE PROJET MIA
# ==================================================================================

import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
import seaborn as sns
import pickle
import gc
from pathlib import Path
from tqdm import tqdm
import time
import warnings
warnings.filterwarnings('ignore')

# Configuration optimis√©e
class OptimizedConfig:
    # Param√®tres adapt√©s √† votre hardware
    IMG_SIZE = (64, 64)     # Plus petit = moins de RAM
    NUM_CLASSES = 15        # Augment√© mais raisonnable
    MIN_SAMPLES_PER_CLASS = 10  # Minimum d'√©chantillons par classe
    
    # Face Blending
    BLEND_RATIOS = [0.3, 0.7]  # Deux ratios pour plus de diversit√©
    AUGMENTATION_FACTOR = 3    # Plus d'augmentation
    
    # ML Parameters
    PCA_COMPONENTS = 100       # Plus de composantes PCA
    TEST_SIZE = 0.25          # 75% train, 25% test
    
    # Chemins
    BASE_DIR = Path("./mia_project_fixed")
    RESULTS_DIR = BASE_DIR / "results"
    
    def __init__(self):
        self.BASE_DIR.mkdir(exist_ok=True)
        self.RESULTS_DIR.mkdir(exist_ok=True)

config = OptimizedConfig()
print(f"üìù Configuration mise √† jour:")
print(f"   - Taille d'image: {config.IMG_SIZE}")
print(f"   - Nombre de classes: {config.NUM_CLASSES}")
print(f"   - Ratios de blend: {config.BLEND_RATIOS}")

üìù Configuration mise √† jour:
   - Taille d'image: (64, 64)
   - Nombre de classes: 15
   - Ratios de blend: [0.3, 0.7]


In [13]:
class FixedDataLoader:
    """Chargeur de donn√©es corrig√© avec crit√®res plus appropri√©s"""
    
    def __init__(self, config):
        self.config = config
        self.class_info = {}
        
    def load_balanced_lfw(self):
        """Charge un dataset √©quilibr√© avec validation appropri√©e"""
        print("\nüì• Chargement √©quilibr√© du dataset LFW (Version Corrig√©e)...")
        
        # Charger avec param√®tres optimis√©s
        lfw_people = fetch_lfw_people(
            min_faces_per_person=self.config.MIN_SAMPLES_PER_CLASS,
            resize=0.4,
            color=False
        )
        
        # Analyser la distribution des classes
        unique_labels, counts = np.unique(lfw_people.target, return_counts=True)
        print(f"   Distribution des classes disponibles:")
        for i, (label, count) in enumerate(zip(unique_labels[:10], counts[:10])):
            print(f"      Classe {label}: {count} images")
        
        # S√©lectionner les classes avec le plus d'√©chantillons
        top_classes = unique_labels[np.argsort(counts)[-self.config.NUM_CLASSES:]]
        mask = np.isin(lfw_people.target, top_classes)
        
        selected_images = lfw_people.images[mask]
        selected_labels = lfw_people.target[mask]
        
        # Remapper les labels
        label_mapping = {old: new for new, old in enumerate(top_classes)}
        mapped_labels = np.array([label_mapping[label] for label in selected_labels])
        
        # V√©rifier l'√©quilibre des classes
        unique_mapped, mapped_counts = np.unique(mapped_labels, return_counts=True)
        print(f"\n‚úÖ Classes s√©lectionn√©es:")
        for label, count in zip(unique_mapped, mapped_counts):
            print(f"   Classe {label}: {count} images")
            self.class_info[label] = count
        
        # Preprocessing avec validation CORRIG√âE
        processed_images = []
        valid_labels = []
        rejected_stats = {"too_uniform": 0, "processing_error": 0, "valid": 0}
        
        print("\nüîß Preprocessing avec validation corrig√©e...")
        for i, (img, label) in enumerate(tqdm(zip(selected_images, mapped_labels), 
                                            total=len(selected_images), desc="Processing")):
            try:
                # Redimensionner
                img_resized = cv2.resize(img, self.config.IMG_SIZE)
                
                # CORRECTION : Crit√®re de qualit√© adapt√© aux images en niveaux de gris (0-255)
                img_std = img_resized.std()
                if img_std < 2:  # Beaucoup plus permissif (√©tait 10, maintenant 2)
                    rejected_stats["too_uniform"] += 1
                    continue
                    
                # Convertir en couleur
                if len(img_resized.shape) == 2:
                    img_resized = cv2.cvtColor(img_resized, cv2.COLOR_GRAY2RGB)
                
                # Normaliser correctement
                img_normalized = img_resized.astype(np.uint8)  # Garder en uint8 directement
                
                processed_images.append(img_normalized)
                valid_labels.append(label)
                rejected_stats["valid"] += 1
                
            except Exception as e:
                rejected_stats["processing_error"] += 1
                continue
        
        final_images = np.array(processed_images, dtype=np.uint8)
        final_labels = np.array(valid_labels)
        
        print(f"\n‚úÖ Dataset preprocessed:")
        print(f"   - Images finales: {len(final_images)}")
        print(f"   - Images rejet√©es (uniformes): {rejected_stats['too_uniform']}")
        print(f"   - Images rejet√©es (erreurs): {rejected_stats['processing_error']}")
        print(f"   - Images valides: {rejected_stats['valid']}")
        print(f"   - Classes finales: {len(np.unique(final_labels))}")
        print(f"   - Forme: {final_images.shape}")
        print(f"   - M√©moire: ~{final_images.nbytes / 1024**2:.1f} MB")
        
        return final_images, final_labels
    
    def create_safe_blending(self, images, labels):
        """Cr√©e des m√©langes avec protection contre les erreurs"""
        print("\nüé≠ Cr√©ation de Face Blending s√©curis√©...")
        
        if len(images) == 0:
            print("‚ùå Aucune image disponible pour le blending!")
            return np.array([]), np.array([]), []
        
        blended_images = []
        blended_labels = []
        blend_metadata = []
        
        # Statistiques pour suivre la g√©n√©ration
        stats = {"total_attempts": 0, "successful_blends": 0, "errors": 0}
        
        for class_id in tqdm(range(self.config.NUM_CLASSES), desc="Safe Blending"):
            class_mask = labels == class_id
            class_images = images[class_mask]
            
            if len(class_images) < 2:
                print(f"   ‚ö†Ô∏è  Classe {class_id}: seulement {len(class_images)} image(s), ignor√©e")
                continue
            
            # G√©n√©rer des m√©langes
            for alpha in self.config.BLEND_RATIOS:
                for aug_idx in range(self.config.AUGMENTATION_FACTOR):
                    stats["total_attempts"] += 1
                    
                    try:
                        # S√©lectionner deux images diff√©rentes
                        idx1, idx2 = np.random.choice(len(class_images), 2, replace=False)
                        
                        img1 = class_images[idx1].astype(np.float32) / 255.0
                        img2 = class_images[idx2].astype(np.float32) / 255.0
                        
                        # M√©lange avec variation l√©g√®re de l'alpha
                        alpha_varied = alpha + np.random.normal(0, 0.05)
                        alpha_varied = np.clip(alpha_varied, 0.2, 0.8)
                        
                        blended = alpha_varied * img1 + (1 - alpha_varied) * img2
                        
                        # Ajouter du bruit l√©ger
                        noise = np.random.normal(0, 0.01, blended.shape)
                        blended = np.clip(blended + noise, 0, 1)
                        
                        # Appliquer une l√©g√®re variation de luminosit√©
                        brightness_factor = np.random.uniform(0.95, 1.05)
                        blended = np.clip(blended * brightness_factor, 0, 1)
                        
                        # Reconvertir en uint8
                        blended_uint8 = (blended * 255).astype(np.uint8)
                        
                        # V√©rification de qualit√© moins stricte
                        if blended_uint8.std() > 1:  # Tr√®s permissif
                            blended_images.append(blended_uint8)
                            blended_labels.append(class_id)
                            
                            blend_metadata.append({
                                'class_id': class_id,
                                'alpha': alpha_varied,
                                'source_indices': [idx1, idx2],
                                'aug_idx': aug_idx
                            })
                            
                            stats["successful_blends"] += 1
                        else:
                            stats["errors"] += 1
                            
                    except Exception as e:
                        stats["errors"] += 1
                        continue
        
        # Protection contre division par z√©ro
        success_rate = (stats["successful_blends"] / stats["total_attempts"]) if stats["total_attempts"] > 0 else 0
        
        print(f"‚úÖ Blending termin√©:")
        print(f"   - Tentatives: {stats['total_attempts']}")
        print(f"   - R√©ussites: {stats['successful_blends']}")
        print(f"   - Erreurs: {stats['errors']}")
        print(f"   - Taux de succ√®s: {success_rate:.2%}")
        
        if len(blended_images) == 0:
            print("‚ö†Ô∏è  Aucun m√©lange g√©n√©r√© avec succ√®s!")
            # Cr√©er des donn√©es factices pour √©viter l'erreur
            dummy_img = np.random.randint(0, 255, (*self.config.IMG_SIZE, 3), dtype=np.uint8)
            return np.array([dummy_img]), np.array([0]), [{'class_id': 0, 'alpha': 0.5}]
        
        return np.array(blended_images, dtype=np.uint8), np.array(blended_labels), blend_metadata

# Pipeline corrig√©
print("üöÄ D√©but du chargement corrig√©...")
fixed_loader = FixedDataLoader(config)
images, labels = fixed_loader.load_balanced_lfw()
blended_images, blended_labels, blend_metadata = fixed_loader.create_safe_blending(images, labels)

# V√©rifier que nous avons des donn√©es
print(f"\nüìä V√©rification des donn√©es:")
print(f"   - Images originales: {len(images)}")
print(f"   - Images blend√©es: {len(blended_images)}")
print(f"   - Labels originaux: {len(labels)}")
print(f"   - Labels blend√©s: {len(blended_labels)}")

if len(images) > 0 and len(blended_images) > 0:
    print("‚úÖ Donn√©es pr√™tes pour l'entra√Ænement!")
    
    # Continuer avec le pipeline ML
    pipeline = ImprovedMLPipeline(config)
    data = pipeline.prepare_robust_data(images, labels, blended_images, blended_labels)
    
    X_train_processed, X_test_processed = pipeline.apply_robust_pca(
        data['X_train'], data['X_test']
    )
    
    models = pipeline.train_multiple_models(X_train_processed, data['y_train'])
    results = pipeline.comprehensive_evaluation(X_test_processed, data['y_test'])
    
    print(f"\nüéä Pipeline termin√©!")
    if results:
        best_acc = max(result['accuracy'] for result in results.values())
        print(f"üèÜ Meilleure accuracy: {best_acc:.2%}")
else:
    print("‚ùå Pas assez de donn√©es pour l'entra√Ænement")

üöÄ D√©but du chargement corrig√©...

üì• Chargement √©quilibr√© du dataset LFW (Version Corrig√©e)...
   Distribution des classes disponibles:
      Classe 0: 19 images
      Classe 1: 12 images
      Classe 2: 39 images
      Classe 3: 35 images
      Classe 4: 21 images
      Classe 5: 36 images
      Classe 6: 15 images
      Classe 7: 20 images
      Classe 8: 11 images
      Classe 9: 12 images

‚úÖ Classes s√©lectionn√©es:
   Classe 0: 44 images
   Classe 1: 48 images
   Classe 2: 49 images
   Classe 3: 52 images
   Classe 4: 52 images
   Classe 5: 53 images
   Classe 6: 55 images
   Classe 7: 60 images
   Classe 8: 71 images
   Classe 9: 77 images
   Classe 10: 109 images
   Classe 11: 121 images
   Classe 12: 144 images
   Classe 13: 236 images
   Classe 14: 530 images

üîß Preprocessing avec validation corrig√©e...


Processing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1701/1701 [00:00<00:00, 17893.67it/s]


‚úÖ Dataset preprocessed:
   - Images finales: 0
   - Images rejet√©es (uniformes): 1701
   - Images rejet√©es (erreurs): 0
   - Images valides: 0
   - Classes finales: 0
   - Forme: (0,)
   - M√©moire: ~0.0 MB

üé≠ Cr√©ation de Face Blending s√©curis√©...
‚ùå Aucune image disponible pour le blending!

üìä V√©rification des donn√©es:
   - Images originales: 0
   - Images blend√©es: 0
   - Labels originaux: 0
   - Labels blend√©s: 0
‚ùå Pas assez de donn√©es pour l'entra√Ænement





In [None]:
class FinalFixedDataLoader:
    """Version finale corrig√©e - sans filtrage trop restrictif"""
    
    def __init__(self, config):
        self.config = config
        self.class_info = {}
        
    def load_robust_lfw(self):
        """Charge le dataset sans filtrage excessif"""
        print("\nüì• Chargement LFW (Version Finale Corrig√©e)...")
        
        # Charger avec param√®tres optimis√©s
        lfw_people = fetch_lfw_people(
            min_faces_per_person=self.config.MIN_SAMPLES_PER_CLASS,
            resize=0.4,
            color=False
        )
        
        # Analyser la distribution des classes
        unique_labels, counts = np.unique(lfw_people.target, return_counts=True)
        print(f"   Distribution des classes disponibles:")
        for i, (label, count) in enumerate(zip(unique_labels[:10], counts[:10])):
            print(f"      Classe {label}: {count} images")
        
        # S√©lectionner les classes avec le plus d'√©chantillons
        top_classes = unique_labels[np.argsort(counts)[-self.config.NUM_CLASSES:]]
        mask = np.isin(lfw_people.target, top_classes)
        
        selected_images = lfw_people.images[mask]
        selected_labels = lfw_people.target[mask]
        
        # Remapper les labels
        label_mapping = {old: new for new, old in enumerate(top_classes)}
        mapped_labels = np.array([label_mapping[label] for label in selected_labels])
        
        print(f"\n‚úÖ Classes s√©lectionn√©es:")
        unique_mapped, mapped_counts = np.unique(mapped_labels, return_counts=True)
        for label, count in zip(unique_mapped, mapped_counts):
            print(f"   Classe {label}: {count} images")
            self.class_info[label] = count
        
        # Preprocessing SIMPLIFI√â (suppression du filtrage probl√©matique)
        processed_images = []
        valid_labels = []
        
        print("\nüîß Preprocessing simplifi√©...")
        
        # V√©rifier d'abord quelques images pour comprendre la distribution
        sample_stds = []
        for i in range(min(10, len(selected_images))):
            img = selected_images[i]
            img_resized = cv2.resize(img, self.config.IMG_SIZE)
            sample_stds.append(img_resized.std())
        
        print(f"   √âcart-types √©chantillon: {sample_stds}")
        print(f"   Min std: {min(sample_stds):.2f}, Max std: {max(sample_stds):.2f}, Moyenne: {np.mean(sample_stds):.2f}")
        
        # Processus sans filtrage excessif
        rejected_stats = {"processing_error": 0, "valid": 0}
        
        for i, (img, label) in enumerate(tqdm(zip(selected_images, mapped_labels), 
                                            total=len(selected_images), desc="Processing")):
            try:
                # Redimensionner
                img_resized = cv2.resize(img, self.config.IMG_SIZE)
                
                # PAS DE FILTRAGE DE QUALIT√â - accepter toutes les images
                # (Le filtrage √©tait trop restrictif pour les images LFW)
                
                # Convertir en couleur
                if len(img_resized.shape) == 2:
                    img_resized = cv2.cvtColor(img_resized, cv2.COLOR_GRAY2RGB)
                
                # Normaliser correctement
                img_normalized = img_resized.astype(np.uint8)
                
                processed_images.append(img_normalized)
                valid_labels.append(label)
                rejected_stats["valid"] += 1
                
            except Exception as e:
                rejected_stats["processing_error"] += 1
                continue
        
        final_images = np.array(processed_images, dtype=np.uint8)
        final_labels = np.array(valid_labels)
        
        print(f"\n‚úÖ Dataset preprocessed:")
        print(f"   - Images finales: {len(final_images)}")
        print(f"   - Images avec erreurs: {rejected_stats['processing_error']}")
        print(f"   - Images valides: {rejected_stats['valid']}")
        print(f"   - Classes finales: {len(np.unique(final_labels))}")
        print(f"   - Forme: {final_images.shape}")
        print(f"   - M√©moire: ~{final_images.nbytes / 1024**2:.1f} MB")
        
        return final_images, final_labels
    
    def create_guaranteed_blending(self, images, labels):
        """Cr√©e des m√©langes garantis de fonctionner"""
        print("\nüé≠ Cr√©ation de Face Blending garantie...")
        
        if len(images) == 0:
            print("‚ùå Aucune image disponible pour le blending!")
            return np.array([]), np.array([]), []
        
        blended_images = []
        blended_labels = []
        blend_metadata = []
        
        # Statistiques pour suivre la g√©n√©ration
        stats = {"total_attempts": 0, "successful_blends": 0}
        
        print(f"   Classes disponibles pour blending:")
        for class_id in range(self.config.NUM_CLASSES):
            class_mask = labels == class_id
            class_images = images[class_mask]
            print(f"      Classe {class_id}: {len(class_images)} images")
        
        for class_id in tqdm(range(self.config.NUM_CLASSES), desc="Guaranteed Blending"):
            class_mask = labels == class_id
            class_images = images[class_mask]
            
            if len(class_images) < 2:
                print(f"   ‚ö†Ô∏è  Classe {class_id}: seulement {len(class_images)} image(s), ignor√©e")
                continue
            
            # G√©n√©rer des m√©langes SANS crit√®res restrictifs
            for alpha in self.config.BLEND_RATIOS:
                for aug_idx in range(self.config.AUGMENTATION_FACTOR):
                    stats["total_attempts"] += 1
                    
                    try:
                        # S√©lectionner deux images diff√©rentes
                        idx1, idx2 = np.random.choice(len(class_images), 2, replace=False)
                        
                        img1 = class_images[idx1].astype(np.float32) / 255.0
                        img2 = class_images[idx2].astype(np.float32) / 255.0
                        
                        # M√©lange simple et robuste
                        alpha_varied = alpha + np.random.normal(0, 0.05)
                        alpha_varied = np.clip(alpha_varied, 0.2, 0.8)
                        
                        blended = alpha_varied * img1 + (1 - alpha_varied) * img2
                        
                        # Ajouter du bruit tr√®s l√©ger
                        noise = np.random.normal(0, 0.005, blended.shape)
                        blended = np.clip(blended + noise, 0, 1)
                        
                        # Reconvertir en uint8
                        blended_uint8 = (blended * 255).astype(np.uint8)
                        
                        # AUCUN filtrage de qualit√© - accepter tout
                        blended_images.append(blended_uint8)
                        blended_labels.append(class_id)
                        
                        blend_metadata.append({
                            'class_id': class_id,
                            'alpha': alpha_varied,
                            'source_indices': [idx1, idx2],
                            'aug_idx': aug_idx
                        })
                        
                        stats["successful_blends"] += 1
                            
                    except Exception as e:
                        print(f"      Erreur dans classe {class_id}: {e}")
                        continue
        
        success_rate = (stats["successful_blends"] / stats["total_attempts"]) if stats["total_attempts"] > 0 else 0
        
        print(f"‚úÖ Blending termin√©:")
        print(f"   - Tentatives: {stats['total_attempts']}")
        print(f"   - R√©ussites: {stats['successful_blends']}")
        print(f"   - Taux de succ√®s: {success_rate:.2%}")
        
        return np.array(blended_images, dtype=np.uint8), np.array(blended_labels), blend_metadata

# Test avec le nouveau loader
print("üöÄ Test du nouveau chargeur de donn√©es...")
final_loader = FinalFixedDataLoader(config)
images, labels = final_loader.load_robust_lfw()

if len(images) > 0:
    print(f"\nüéâ Succ√®s! {len(images)} images charg√©es")
    
    # Cr√©er les m√©langes
    blended_images, blended_labels, blend_metadata = final_loader.create_guaranteed_blending(images, labels)
    
    # V√©rifier les r√©sultats
    print(f"\nüìä V√©rification finale:")
    print(f"   - Images originales: {len(images)}")
    print(f"   - Images blend√©es: {len(blended_images)}")
    print(f"   - Labels originaux: {len(labels)}")
    print(f"   - Labels blend√©s: {len(blended_labels)}")
    
    if len(images) > 0 and len(blended_images) > 0:
        print("\n‚úÖ DONN√âES PR√äTES - Lancement du pipeline ML!")
        
        # Lancer le pipeline ML
        pipeline = ImprovedMLPipeline(config)
        data = pipeline.prepare_robust_data(images, labels, blended_images, blended_labels)
        
        X_train_processed, X_test_processed = pipeline.apply_robust_pca(
            data['X_train'], data['X_test']
        )
        
        models = pipeline.train_multiple_models(X_train_processed, data['y_train'])
        results = pipeline.comprehensive_evaluation(X_test_processed, data['y_test'])
        
        print(f"\nüèÜ R√âSULTATS FINAUX:")
        if results:
            for model_name, result in results.items():
                print(f"   - {model_name}: {result['accuracy']:.2%}")
            
            best_acc = max(result['accuracy'] for result in results.values())
            print(f"\nü•á Meilleure performance: {best_acc:.2%}")
        else:
            print("‚ùå Aucun mod√®le n'a pu √™tre entra√Æn√©")
    else:
        print("‚ùå Probl√®me avec la g√©n√©ration des donn√©es")
else:
    print("‚ùå √âchec du chargement des donn√©es")


üöÄ LANCEMENT DU PIPELINE COMPLET

üìä Pr√©paration robuste des donn√©es...
   Distribution finale des classes:

üìà Statistiques des features:
   - Moyenne: nan
   - Std: nan


ValueError: zero-size array to reduction operation minimum which has no identity

In [10]:
# 1. Ta config
class Config:
    NUM_CLASSES = 50       # On commence petit pour tester
    IMG_SIZE = (128, 128)
    BLEND_RATIOS = [0.5]
    AUGMENTATION_FACTOR = 2
    LEARNING_RATE = 1e-3
    BATCH_SIZE = 16        # Petit batch pour √©viter le OOM
    EPOCHS = 10

config = Config()

# 2. Pipeline
gen = FaceBlendingDataGenerator(config)
gen.load_lfw_subset()
gen.create_blended_faces()
X_train, X_val, y_train, y_val = gen.prepare_training_data()

# 3. Entra√Ænement
clf = AdvancedCNNClassifier(config)
clf.build_model()
clf.train(X_train, y_train, X_val, y_val)


üì• Chargement du dataset LFW (Optimis√© RAM)...


Preprocessing: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2527/2527 [00:00<00:00, 4415.73it/s]



üé≠ G√©n√©ration de Face Blending...


Blending: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:00<00:00, 54.68it/s]


‚úÖ Blending termin√© : 100 images g√©n√©r√©es.


NameError: name 'AdvancedCNNClassifier' is not defined