In [5]:
import numpy as np
import os
import cv2
from tqdm import tqdm
from tensorflow.keras import applications
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.datasets import fetch_lfw_people

# Function to load the LFW dataset
def load_lfw_data():
    print("Loading LFW dataset...")
    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
    X = lfw_people.images
    y = lfw_people.target
    target_names = lfw_people.target_names
    return X, y, target_names

# Load dataset
X, y, target_names = load_lfw_data()

# Preprocessing function
def preprocess_face(face_image, model_type="vgg16"):
    face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)  # Convert to RGB
    
    if model_type == "vgg16" or model_type == "resnet50":
        # Resize to input size of VGG16 and ResNet (224x224)
        face_image = cv2.resize(face_image, (224, 224))
    
    # Convert to float32 and normalize to [0, 1]
    face_image = np.expand_dims(face_image, axis=0).astype(np.float32) / 255.0
    return face_image

# Function to load VGG16 and ResNet models
def load_models():
    print("Loading VGG16 and ResNet50 models...")
    # Load the VGG16 model from Keras applications
    vgg_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # ResNet50 model (pre-trained on ImageNet)
    resnet_model = applications.ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    
    print("Models loaded.")
    return vgg_model, resnet_model

# Function to extract features from each model
def extract_features(model, faces, model_type="vgg16"):
    print(f"Extracting features using {model_type} model...")
    features = []
    for i, face in enumerate(tqdm(faces, desc=f"Extracting {model_type} features", total=len(faces))):
        preprocessed_face = preprocess_face(face, model_type=model_type)
        
        # Extract features using the model
        feature = model.predict(preprocessed_face)
        features.append(feature.flatten())

    print(f"Feature extraction for {model_type} completed.")
    return np.array(features)

# Function to save features locally (to not extract every time)
def save_features(features, model_type):
    file_path = f"{model_type}_features.npy"
    np.save(file_path, features)
    print(f"Features saved to {file_path}")

# Function to load features from disk
def load_features(model_type):
    file_path = f"{model_type}_features.npy"
    if os.path.exists(file_path):
        features = np.load(file_path)
        print(f"Loaded features from {file_path}")
        return features
    else:
        print(f"No cached features found for {model_type}.")
        return None

# Load models
vgg_model, resnet_model = load_models()

# Check if features already exist, and load them if they do
vgg_face_features = load_features("vgg16")
resnet_features = load_features("resnet50")

# If features don't exist, extract and save them
if vgg_face_features is None:
    vgg_face_features = extract_features(vgg_model, X, model_type="vgg16")
    save_features(vgg_face_features, "vgg16")

if resnet_features is None:
    resnet_features = extract_features(resnet_model, X, model_type="resnet50")
    save_features(resnet_features, "resnet50")

# Split the dataset into training and testing sets
print("Splitting the dataset into training and testing sets...")
X_train_vgg, X_test_vgg, y_train_vgg, y_test_vgg = train_test_split(vgg_face_features, y, test_size=0.2, random_state=42)
X_train_resnet, X_test_resnet, y_train_resnet, y_test_resnet = train_test_split(resnet_features, y, test_size=0.2, random_state=42)

# Train a classifier for each model's features
svm_vgg_face = SVC(kernel='linear', probability=True)
svm_resnet = SVC(kernel='linear', probability=True)

# Training progress
def train_svm_with_progress(svm, X_train, y_train, model_name="VGG16"):
    print(f"Training SVM classifier on {model_name} features...")
    # We cannot directly use tqdm with sklearn's SVM, so we display manual progress updates
    n_samples = len(X_train)
    for i in range(0, n_samples, 10):  # Train in chunks to show progress
        end = min(i + 10, n_samples)
        svm.fit(X_train[i:end], y_train[i:end])
        percentage_complete = (end / n_samples) * 100
        print(f"{model_name} training progress: {percentage_complete:.2f}%")
    print(f"SVM classifier trained on {model_name} features.")

# Train SVM on VGG16 features
train_svm_with_progress(svm_vgg_face, X_train_vgg, y_train_vgg, model_name="VGG16")

# Train SVM on ResNet features
train_svm_with_progress(svm_resnet, X_train_resnet, y_train_resnet, model_name="ResNet")

# Predict using SVM classifiers
print("Making predictions on the test set using VGG16 model...")
y_pred_vgg_face = svm_vgg_face.predict(X_test_vgg)

print("Making predictions on the test set using ResNet model...")
y_pred_resnet = svm_resnet.predict(X_test_resnet)

# Evaluate accuracy, precision and recall
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    return accuracy, precision, recall

# Evaluate each model
print("Evaluating VGG16 model...")
accuracy_vgg, precision_vgg, recall_vgg = evaluate_model(y_test_vgg, y_pred_vgg_face)
print("Evaluating ResNet model...")
accuracy_resnet, precision_resnet, recall_resnet = evaluate_model(y_test_resnet, y_pred_resnet)

# Print the results
print("\nVGG16 Model Performance:")
print(f"Accuracy: {accuracy_vgg:.4f}")
print(f"Precision: {precision_vgg:.4f}")
print(f"Recall: {recall_vgg:.4f}\n")

print("ResNet Model Performance:")
print(f"Accuracy: {accuracy_resnet:.4f}")
print(f"Precision: {precision_resnet:.4f}")
print(f"Recall: {recall_resnet:.4f}\n")


Loading LFW dataset...
LFW dataset loaded with 7 classes.
Loading VGG16 and ResNet50 models...
Models loaded.
Loaded features from vgg16_features.npy
Loaded features from resnet50_features.npy
Splitting the dataset into training and testing sets...
Training SVM classifier on VGG-Face features...
VGG-Face training progress: 0.97%
VGG-Face training progress: 1.94%
VGG-Face training progress: 2.91%
VGG-Face training progress: 3.88%
VGG-Face training progress: 4.85%
VGG-Face training progress: 5.83%
VGG-Face training progress: 6.80%
VGG-Face training progress: 7.77%
VGG-Face training progress: 8.74%
VGG-Face training progress: 9.71%
VGG-Face training progress: 10.68%
VGG-Face training progress: 11.65%
VGG-Face training progress: 12.62%
VGG-Face training progress: 13.59%
VGG-Face training progress: 14.56%
VGG-Face training progress: 15.53%
VGG-Face training progress: 16.50%
VGG-Face training progress: 17.48%
VGG-Face training progress: 18.45%
VGG-Face training progress: 19.42%
VGG-Face trai