In [16]:
import os
import cv2
import torch
import numpy as np
from facenet_pytorch import MTCNN
from tqdm import tqdm

In [17]:
# Setup MTCNN
device = 'cuda' if torch.cuda.is_available() else 'cpu'
mtcnn = MTCNN(image_size=48, margin=0, device=device)

In [28]:
# Define input and output folders
base_input_dir = './FER-2013'  # Change this to your dataset root
output_dir = 'processed_faces'
os.makedirs(output_dir, exist_ok=True)

In [29]:
# Combine both train and test
splits = ['train', 'test']
emotions = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

In [30]:
# Stats counters
stats = {emotion: {
    'total': 0,
    'none_image': 0,
    'no_face': 0,
    'success': 0
} for emotion in emotions}

for split in splits:
    for emotion in emotions:
        input_folder = os.path.join(base_input_dir, split, emotion)
        output_subfolder = os.path.join(output_dir, emotion)
        os.makedirs(output_subfolder, exist_ok=True)

        for img_name in tqdm(os.listdir(input_folder), desc=f"{split}/{emotion}"):
            stats[emotion]['total'] += 1
            img_path = os.path.join(input_folder, img_name)
            image = cv2.imread(img_path)

            if image is None:
                stats[emotion]['none_image'] += 1
                continue

            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            face = mtcnn(image_rgb)

            if face is None:
                stats[emotion]['no_face'] += 1
                continue

            face_np = face.permute(1, 2, 0).numpy()
            face_gray = cv2.cvtColor((face_np * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
            face_normalized = face_gray / 255.0

            save_path = os.path.join(output_subfolder, f"{split}_{img_name}")
            cv2.imwrite(save_path, (face_normalized * 255).astype(np.uint8))
            stats[emotion]['success'] += 1

train/angry: 100%|██████████| 3995/3995 [00:24<00:00, 164.57it/s]
train/disgust: 100%|██████████| 436/436 [00:02<00:00, 195.15it/s]
train/fear: 100%|██████████| 4097/4097 [00:21<00:00, 193.49it/s]
train/happy: 100%|██████████| 7215/7215 [00:41<00:00, 172.42it/s]
train/neutral: 100%|██████████| 4965/4965 [00:30<00:00, 163.22it/s]
train/sad: 100%|██████████| 4830/4830 [00:26<00:00, 183.16it/s]
train/surprise: 100%|██████████| 3171/3171 [00:18<00:00, 173.29it/s]
test/angry: 100%|██████████| 958/958 [00:05<00:00, 184.01it/s]
test/disgust: 100%|██████████| 111/111 [00:00<00:00, 195.95it/s]
test/fear: 100%|██████████| 1024/1024 [00:04<00:00, 209.26it/s]
test/happy: 100%|██████████| 1774/1774 [00:10<00:00, 163.94it/s]
test/neutral: 100%|██████████| 1233/1233 [00:08<00:00, 149.54it/s]
test/sad: 100%|██████████| 1247/1247 [00:08<00:00, 148.40it/s]
test/surprise: 100%|██████████| 831/831 [00:04<00:00, 191.09it/s]


In [35]:
# Summary Report
print("Summary Report")
print(f"{'Emotion':<10} | {'Total':<6} | {'Success':<8} | {'Image None':<11} | {'No Face':<8} | {'Success %':<10}")
print("-" * 65)

for emotion in emotions:
    total = stats[emotion]['total']
    success = stats[emotion]['success']
    none_image = stats[emotion]['none_image']
    no_face = stats[emotion]['no_face']
    percent = (success / total) * 100 if total > 0 else 0

    print(f"{emotion:<10} | {total:<6} | {success:<8} | {none_image:<11} | {no_face:<8} | {percent:>9.2f}%")

Summary Report
Emotion    | Total  | Success  | Image None  | No Face  | Success % 
-----------------------------------------------------------------
angry      | 4953   | 4186     | 0           | 767      |     84.51%
disgust    | 547    | 432      | 0           | 115      |     78.98%
fear       | 5121   | 4071     | 0           | 1050     |     79.50%
happy      | 8989   | 7789     | 0           | 1200     |     86.65%
neutral    | 6198   | 5541     | 0           | 657      |     89.40%
sad        | 6077   | 4698     | 0           | 1379     |     77.31%
surprise   | 4002   | 3442     | 0           | 560      |     86.01%
