In [None]:
!pip install -q kagglehub tensorflow keras matplotlib

import kagglehub
import os

path = kagglehub.dataset_download("hemantsoni042/celebrity-images-for-face-recognition")

In [None]:
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import os

image_dir = os.path.join(path, 'celebrities_images')
celebrity_folders = [d for d in os.listdir(image_dir) if os.path.isdir(os.path.join(image_dir, d))]

image_paths = []
labels = []
for celebrity_name in celebrity_folders:
    celebrity_folder_path = os.path.join(image_dir, celebrity_name)
    for image_file in os.listdir(celebrity_folder_path):
        image_path = os.path.join(celebrity_folder_path, image_file)
        image_paths.append(image_path)
        labels.append(celebrity_name)

unique_labels = sorted(list(set(labels)))
label_to_id = {label: i for i, label in enumerate(unique_labels)}

images = []
numerical_labels = []
image_size = (100, 100)

for image_path, label in zip(image_paths, labels):
    img = cv2.imread(image_path)
    if img is not None:
        img = cv2.resize(img, image_size)
        img = img / 255.0
        images.append(img)
        numerical_labels.append(label_to_id[label])

images = np.array(images)
numerical_labels = np.array(numerical_labels)

X_train, X_test, y_train, y_test = train_test_split(images, numerical_labels, test_size=0.2, random_state=42)

y_train_one_hot = to_categorical(y_train, num_classes=len(unique_labels))
y_test_one_hot = to_categorical(y_test, num_classes=len(unique_labels))

print(f"Number of images loaded: {len(images)}")
print(f"Shape of images array: {images.shape}")
print(f"Shape of numerical labels array: {numerical_labels.shape}")
print(f"Number of unique celebrities: {len(unique_labels)}")
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of X_test: {X_test.shape}")
print(f"Shape of y_train: {y_train.shape}")
print(f"Shape of y_test: {y_test.shape}")
print(f"Shape of y_train_one_hot: {y_train_one_hot.shape}")
print(f"Shape of y_test_one_hot: {y_test_one_hot.shape}")

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

num_classes = len(unique_labels)

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(image_size[0], image_size[1], 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.summary()

In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train_one_hot,
                    epochs=10,
                    validation_data=(X_test, y_test_one_hot))

In [None]:
import cv2
import matplotlib.pyplot as plt

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def recognize_face(image_path):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Could not load image from {image_path}")
        return None

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)

    img_with_detections = img.copy()

    for (x, y, w, h) in faces:
        face_img = img[y:y+h, x:x+w]
        face_img = cv2.resize(face_img, image_size)
        face_img = face_img / 255.0
        face_img = np.expand_dims(face_img, axis=0)

        predictions = model.predict(face_img)
        predicted_class_id = np.argmax(predictions)
        predicted_label = unique_labels[predicted_class_id]

        cv2.rectangle(img_with_detections, (x, y), (x+w, y+h), (255, 0, 0), 1)
        cv2.putText(img_with_detections, predicted_label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    return img_with_detections


In [None]:
sample_image_array = (X_test[sample_image_index] * 255).astype(np.uint8)
sample_image_label_id = y_test[sample_image_index]
sample_image_label = unique_labels[sample_image_label_id]

temp_image_path = "temp_test_image.jpg"
cv2.imwrite(temp_image_path, cv2.cvtColor(sample_image_array, cv2.COLOR_RGB2BGR))

recognized_image = recognize_face(temp_image_path)

if recognized_image is not None:
    plt.figure(figsize=(16, 16))
    plt.imshow(cv2.cvtColor(recognized_image, cv2.COLOR_BGR2RGB))
    plt.title(f"Detected Faces and Recognized Identities\n(Original Label: {sample_image_label})")
    plt.axis('off')
    plt.show()

    os.remove(temp_image_path)
