In [3]:
!pip install facenet_pytorch

Collecting facenet_pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl.metadata (13 kB)
Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: facenet_pytorch
Successfully installed facenet_pytorch-2.5.3


In [4]:
import cv2
from PIL import Image
import numpy as np
from matplotlib import pyplot as plt
import torch
from torch import nn
from facenet_pytorch import MTCNN
from torch.utils.data import Dataset, DataLoader
from torchvision.utils import save_image
import os
import pandas as pd
from torchvision import transforms as tfs
import random

In [5]:
class MTCNN_Dataset(Dataset):
    def __init__(self, imgs_path, labels_path, mode, subset=None, transform=None):
        super().__init__()
        self.imgs_folder = imgs_path
        self.labels = pd.read_csv(labels_path, sep=" ", header=None)
        self.transform = transform
        self.mode = mode
        
        if mode == 'rec':
            if subset == 'train':
                labels = self.labels[:162770]
            elif subset == 'val':
                labels = self.labels[162770:182637]
            elif subset == 'test':
                labels = self.labels[182637:]
            elif subset == 'all':
                labels = self.labels
        
        
    def __len__(self):
        if self.mode == 'rec':
            return len(labels)
        else:
            return len(self.labels)
    
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
                idx = idx.tolist()
        if self.mode == 'rec':
            img_name, label = labels.iloc[idx]
            img_path = os.path.join(imgs_path, img_name)
            img = Image.open(img_path)
            if self.transform:
                img = self.transform(img)
            return img, label
        elif self.mode == 'detect':
            img_name = self.labels.iloc[:, 0].iloc[idx]
            img_path = os.path.join(imgs_path, img_name)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = torch.tensor(img)
            return img, img_name

In [6]:
imgs_path = '/kaggle/input/celeba-dataset/img_align_celeba/img_align_celeba'
labels_path = '/kaggle/input/celeba-identity/identity_CelebA.txt'
full_set = MTCNN_Dataset(imgs_path, labels_path, mode='detect')

In [7]:
dataloader = DataLoader(full_set, 128, shuffle=False)

In [None]:
def show_imgs(dataloader, n_imgs):
    batch, _ = next(iter(dataloader))
    imgs = random.sample(batch.tolist(), n_imgs)
    set_size = 0
    if len(imgs) % 2 == 0:
        set_size = 2
    elif len(imgs) % 3 == 0:
        set_size = 3
    else:
        set_size = 1
    n_rows = len(imgs) // set_size
    n_cols = len(imgs) // n_rows
    for img_idx in range(len(imgs)):
        plt.subplot(n_rows, n_cols, img_idx+1)
        plt.axis('off')
        plt.imshow(imgs[img_idx])
    plt.subplots_adjust(hspace=0.05, wspace = 0.05)
    plt.show();

In [8]:
def align_face(image,
               bbox: list | np.ndarray,
               landmarks: list | np.ndarray,
               image_size: tuple = (160, 160)) -> np.ndarray:
    left_eye = landmarks[0]
    right_eye = landmarks[1]

    # Calculate angle between the eyes
    dy = right_eye[1] - left_eye[1]
    dx = right_eye[0] - left_eye[0]
    angle = np.arctan2(dy, dx) * 180.0 / np.pi

    # Calculate center of eyes
    eyes_center = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2)

    # Rotate image around the center of the eyes
    rot_matrix = cv2.getRotationMatrix2D(eyes_center, angle, scale=1)
    aligned_face = cv2.warpAffine(image, rot_matrix, (image.shape[1], image.shape[0]), flags=cv2.INTER_LINEAR)
    
    if any(coord < 0 for coord in bbox):
        return None
    
    x1, y1, x2, y2 = bbox
    
    cropped_aligned_face = aligned_face[y1:y2, x1:x2]

    cropped_aligned_face = cv2.resize(cropped_aligned_face, dsize=image_size)

    return torch.tensor(cropped_aligned_face)

In [163]:
class FaceExtractor(nn.Module):
    def __init__(self, detector, device='cpu', img_size=(160, 160)):
        super(FaceExtractor, self).__init__()
        self.model = detector
        self.model.device = device
        self.img_size = img_size
    
    def get_bbox_landmarks(self, img_batch):
        bbox_batch, proba_batch, landmarks_batch = self.model.detect(img_batch, landmarks=True)
        results = {'bbox': [],
                   'landmarks': [],
                   'proba': []}
        for bbox, proba, landmarks in zip(bbox_batch, proba_batch, landmarks_batch):
            if bbox is not None and proba is not None and landmarks is not None:
                if proba is not None:
                    proba = proba[0]
                bbox = [int(coord) for coord in bbox[0]]
                landmarks = landmarks[0]
                results['bbox'].append(bbox)
                results['landmarks'].append(landmarks)
                results['proba'].append(proba)
            else:
                results['bbox'].append(None)
                results['landmarks'].append(None)
                results['proba'].append(None)
        return results
    
    def npAngle(self, a, b, c):
        ba = np.array(a) - np.array(b)
        bc = np.array(c) - np.array(b) 

        cosine_angle = np.dot(ba, bc)/(np.linalg.norm(ba)*np.linalg.norm(bc))
        angle = np.arccos(cosine_angle)

        return np.degrees(angle)
    
    def predFacePose(self, bbox_, landmarks_, prob_):
    
        angle_R_List = []
        angle_L_List = []
        predLabelList = []

        for bbox, landmarks, prob in zip(bbox_, landmarks_, prob_):
            if bbox is not None: # To check if we detect a face in the image
                if prob > 0.9: # To check if the detected face has probability more than 90%, to avoid 
                    angR = self.npAngle(landmarks[0], landmarks[1], landmarks[2]) # Calculate the right eye angle
                    angL = self.npAngle(landmarks[1], landmarks[0], landmarks[2])# Calculate the left eye angle
                    angle_R_List.append(angR)
                    angle_L_List.append(angL)
                    if ((int(angR) in range(30, 66)) and (int(angL) in range(30, 66))):
                        predLabel='Frontal'
                        predLabelList.append(predLabel)
                    else:
                        if angR < angL:
                            predLabel='Left Profile'
                        else:
                            predLabel='Right Profile'
                        predLabelList.append(predLabel)
                else:
                    predLabelList.append(None)
                    angle_R_List.append(None)
                    angle_L_List.append(None)
            else:
                predLabelList.append(None)
                angle_R_List.append(None)
                angle_L_List.append(None)
                
        face_d = {'angle_right': angle_R_List,
                'angle_left': angle_L_List,
                'label': predLabelList}
                
        return face_d

    def forward(self, img_batch):
        outps = self.get_bbox_landmarks(img_batch)
        aligned_faces = []
        bbox, landmarks, proba = outps['bbox'], outps['landmarks'], outps['proba']
        face_d = zip(bbox, landmarks, proba)
        angles = self.predFacePose(bbox, landmarks, proba)
        angles = angles['label']
        for idx, (face_data, angle) in enumerate(zip(face_d, angles)):
            if face_data is not None:
                if angle == 'Frontal':
                    bbox, landmarks, proba = face_data
                    aligned_faces.append(align_face(np.array(img_batch[idx]), bbox, landmarks, self.img_size))
                else:
                    aligned_faces.append(None)
            else:
                aligned_faces.append(None)
        return np.array(aligned_faces, dtype='object')


In [164]:
mtcnn = MTCNN(device='cuda:0', keep_all=False, thresholds=[0.6, 0.8, 0.92], selection_method='probability')
model = FaceExtractor(mtcnn, device='cuda:0')

In [165]:
def detect_and_save_batch(model, dir_name, dataloader, device='cuda:0'):
    with torch.inference_mode():
        model.eval()
        bad_imgs = []
        for batch, filenames in dataloader:
            cropped_faces = model(batch)
            for img, filename in zip(cropped_faces, filenames):
                if isinstance(img, np.ndarray):
                    img = torch.tensor(img.astype('uint8'))
                if img is None:
                    bad_imgs.append(filename)
                    print(f"Faces wasn't detected: {len(bad_imgs)}")
                    continue
                img = img.permute(2, 0, 1) / 255
                img_path = os.path.join(dir_name, filename)
                os.makedirs(dir_name, exist_ok=True)
                save_image(img, img_path)
    print('All images were successfully uploaded!')
    return bad_imgs
            

In [166]:
root = '/kaggle/working/celeba_cropped_'
device = 'cuda:0'

In [None]:
to_drop = detect_and_save_batch(model, root, dataloader, device)

In [168]:
annot = pd.read_csv('/kaggle/input/celeba-identity/identity_CelebA.txt', sep=' ', names=['Image_name', 'Label'])

In [178]:
print(len(to_drop))

41100


In [171]:
idx = annot[(annot['Image_name'].isin(to_drop))].index

In [172]:
annot.drop(index=idx, axis=0, inplace=True)

In [173]:
annot

Unnamed: 0,Image_name,Label
1,000002.jpg,2937
5,000006.jpg,4153
6,000007.jpg,9040
7,000008.jpg,6369
8,000009.jpg,3332
...,...,...
202594,202595.jpg,9761
202595,202596.jpg,7192
202596,202597.jpg,9852
202597,202598.jpg,5570


In [174]:
annot.to_csv('Identity_CelebA.txt', sep=' ')

In [177]:
import shutil
shutil.make_archive('celeba_aligned', 'zip', '/kaggle/working/celeba_cropped_')

'/kaggle/working/celeba_aligned.zip'