# Face detection and recognition training pipeline

The following example illustrates how to fine-tune an InceptionResnetV1 model on your own dataset. This will mostly follow standard pytorch training patterns.

In [3]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os
from PIL import Image  # Add this line


# Define run parameters
data_dir = '../data/test_images'
batch_size = 32
epochs = 34
workers = 0 if os.name == 'nt' else 8

# Determine if an NVIDIA GPU is available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

# Define MTCNN module
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

# Define function to save cropped faces
def save_cropped_faces(image_paths, save_paths):
    for image_path, save_path in zip(image_paths, save_paths):
        image = Image.open(image_path)
        image = np.array(image)
        boxes, _ = mtcnn.detect(image)
        if boxes is not None:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            for i, box in enumerate(boxes):
                x1, y1, x2, y2 = [int(coord) for coord in box]
                face_image = image[y1:y2, x1:x2]
                face_pil = Image.fromarray(face_image)
                face_pil.save(save_path.replace('.jpg', f'_face_{i}.jpg'))

# Perform MTCNN facial detection
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
    for p, _ in dataset.samples
]

# Save cropped faces
image_paths, save_paths = zip(*dataset.samples)
save_cropped_faces(image_paths, save_paths)

# Define Inception Resnet V1 module
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(dataset.class_to_idx)
).to(device)

# Define optimizer, scheduler, dataset, and dataloader
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, milestones=[5, 10], gamma=0.1)

trans = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    fixed_image_standardization
])
dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

# Define loss and evaluation functions
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

# Train model
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()


Running on device: cpu


Initial
----------
Valid |     1/1    | loss:    3.0536 | fps:    8.0540 | acc:    0.0000   

Epoch 1/34
----------
Train |     1/1    | loss:    2.9169 | fps:    5.6832 | acc:    0.0833   
Valid |     1/1    | loss:    2.8914 | fps:    7.6140 | acc:    0.0000   

Epoch 2/34
----------
Train |     1/1    | loss:    0.7372 | fps:    7.1679 | acc:    1.0000   
Valid |     1/1    | loss:    2.9622 | fps:   14.8514 | acc:    0.0000   

Epoch 3/34
----------
Train |     1/1    | loss:    0.1935 | fps:    7.4368 | acc:    1.0000   
Valid |     1/1    | loss:    3.2824 | fps:   16.6660 | acc:    0.0000   

Epoch 4/34
----------
Train |     1/1    | loss:    0.0574 | fps:    6.9039 | acc:    1.0000   
Valid |     1/1    | loss:    4.2879 | fps:   18.1614 | acc:    0.0000   

Epoch 5/34
----------
Train |     1/1    | loss:    0.0226 | fps:    7.5566 | acc:    1.0000   
Valid |     1/1    | loss:    5.3274 | fps:   17.8565 | acc:    0.0000   

Epoch 6/34
----------
Trai

In [5]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os
from PIL import Image  # Add this line


# Define run parameters
data_dir = '../data/test_images'
batch_size = 32
epochs = 34
workers = 0 if os.name == 'nt' else 8

# Determine if an NVIDIA GPU is available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

# Define MTCNN module
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

# Define function to save cropped faces
def save_cropped_faces(image_paths, save_paths):
    for image_path, save_path in zip(image_paths, save_paths):
        image = Image.open(image_path)
        image = np.array(image)
        boxes, _ = mtcnn.detect(image)
        if boxes is not None:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            for i, box in enumerate(boxes):
                x1, y1, x2, y2 = [int(coord) for coord in box]
                face_image = image[y1:y2, x1:x2]
                face_pil = Image.fromarray(face_image)
                face_pil.save(save_path.replace('.jpg', f'_face_{i}.jpg'))

# Perform MTCNN facial detection
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
    for p, _ in dataset.samples
]

# Save cropped faces
image_paths, save_paths = zip(*dataset.samples)
save_cropped_faces(image_paths, save_paths)

# Define Inception Resnet V1 module
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(dataset.class_to_idx)
).to(device)

# Define optimizer, scheduler, dataset, and dataloader
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, milestones=[5, 10], gamma=0.1)

trans = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    fixed_image_standardization
])
dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

# Define loss and evaluation functions
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

# Train model
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()

import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
import os

# ตั้งค่า device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# โหลดโมเดล MTCNN และ InceptionResnetV1
mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20, device=device)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# โฟลเดอร์ที่เก็บรูปถ่ายของนักเรียน
student_images_folder = r"D:\Friend\Super AI\V89\My Project\facenet-pytorch-master\data\test_images"

# ลูปสร้าง embeddings สำหรับนักเรียนแต่ละคน
for student_name in os.listdir(student_images_folder):
    student_folder = os.path.join(student_images_folder, student_name)
    
    # ตรวจสอบว่ามีโฟลเดอร์รูปภาพของนักเรียน
    if os.path.isdir(student_folder):
        embeddings_list = []

        # ลูปสำหรับแต่ละรูปภาพในโฟลเดอร์
        for image_name in os.listdir(student_folder):
            image_path = os.path.join(student_folder, image_name)
            img = Image.open(image_path)

            # ใช้ MTCNN เพื่อตัดภาพใบหน้า
            face = mtcnn(img)

            if face is not None:
                # สร้าง embeddings สำหรับใบหน้านั้น
                face_embedding = resnet(face.unsqueeze(0).to(device)).detach()

                # เก็บ embeddings ไว้ในรายการ
                embeddings_list.append(face_embedding)

        # หาค่าเฉลี่ยของ embeddings (ในกรณีที่มีหลายรูป)
        if embeddings_list:
            student_embedding = torch.mean(torch.stack(embeddings_list), dim=0)
            
            # บันทึก embeddings ลงไฟล์ .pt
            torch.save(student_embedding, f"{student_name}_embedding.pt")
            print(f"Saved embedding for {student_name}")
        else:
            print(f"No face detected for {student_name}")


Running on device: cpu


Initial
----------
Valid |     1/1    | loss:    2.9169 | fps:   18.1266 | acc:    0.0833   

Epoch 1/34
----------
Train |     2/2    | loss:    2.7050 | fps:    7.4555 | acc:    0.2031   
Valid |     1/1    | loss:    2.9714 | fps:   26.1434 | acc:    0.0833   

Epoch 2/34
----------
Train |     2/2    | loss:    1.0135 | fps:    7.3639 | acc:    0.7656   
Valid |     1/1    | loss:    3.4542 | fps:   20.7610 | acc:    0.0833   

Epoch 3/34
----------
Train |     2/2    | loss:    0.4184 | fps:    6.7469 | acc:    0.8750   
Valid |     1/1    | loss:    3.6785 | fps:   22.0116 | acc:    0.0833   

Epoch 4/34
----------
Train |     2/2    | loss:    0.3758 | fps:    7.4994 | acc:    0.8438   
Valid |     1/1    | loss:    4.6528 | fps:   19.3548 | acc:    0.0833   

Epoch 5/34
----------
Train |     2/2    | loss:    0.1898 | fps:    7.4529 | acc:    0.9688   
Valid |     1/1    | loss:    7.0755 | fps:   21.0140 | acc:    0.0833   

Epoch 6/34
----------
Trai

In [7]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os
from PIL import Image  # Add this line


# Define run parameters
data_dir = '../data/test_images'
batch_size = 32
epochs = 34
workers = 0 if os.name == 'nt' else 8

# Determine if an NVIDIA GPU is available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

# Define MTCNN module
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

# Define function to save cropped faces
def save_cropped_faces(image_paths, save_paths):
    for image_path, save_path in zip(image_paths, save_paths):
        image = Image.open(image_path)
        image = np.array(image)
        boxes, _ = mtcnn.detect(image)
        if boxes is not None:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            for i, box in enumerate(boxes):
                x1, y1, x2, y2 = [int(coord) for coord in box]
                face_image = image[y1:y2, x1:x2]
                face_pil = Image.fromarray(face_image)
                face_pil.save(save_path.replace('.jpg', f'_face_{i}.jpg'))

# Perform MTCNN facial detection
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
    for p, _ in dataset.samples
]

# Save cropped faces
image_paths, save_paths = zip(*dataset.samples)
save_cropped_faces(image_paths, save_paths)

# Define Inception Resnet V1 module
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(dataset.class_to_idx)
).to(device)

# Define optimizer, scheduler, dataset, and dataloader
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, milestones=[5, 10], gamma=0.1)

trans = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    fixed_image_standardization
])
dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

# Define loss and evaluation functions
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

# Train model
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()

from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import pandas as pd
import os

# Set device for computation
workers = 0 if os.name == 'nt' else 4  # Adjust workers for OS
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

# Initialize MTCNN for face detection
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

# Initialize InceptionResnetV1 for face recognition
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Custom collate function for the dataloader
def collate_fn(x):
    return x[0]

# Load dataset from image folder
dataset = datasets.ImageFolder('../data/test_images')
dataset.idx_to_class = {i: c for c, i in dataset.class_to_idx.items()}

# Create a DataLoader
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

aligned = []
names = []

# Process each image in the DataLoader
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)  # Detect and align faces
    if x_aligned is not None:
        print('Face detected with probability: {:.8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

# Save embeddings with corresponding names in a dictionary
if len(aligned) > 0:
    aligned = torch.stack(aligned).to(device)

    # Generate face embeddings
    embeddings = resnet(aligned).detach().cpu()

    # Create a dictionary mapping names to embeddings
    embeddings_dict = {name: embedding for name, embedding in zip(names, embeddings)}

    # Save the dictionary to a .pt file
    torch.save(embeddings_dict, 'embeddings_with_names.pt')
    print("Embeddings with names saved to 'embeddings_with_names.pt'")

    # Compute pairwise distances between embeddings
    dists = [[(e1 - e2).norm().item() for e2 in embeddings] for e1 in embeddings]

    # Print the distance matrix as a DataFrame
    print(pd.DataFrame(dists, columns=names, index=names))
else:
    print("No faces detected.")



Running on device: cpu


Initial
----------
Valid |     1/1    | loss:    2.7771 | fps:   19.6720 | acc:    0.0833   

Epoch 1/34
----------
Train |     2/2    | loss:    2.7256 | fps:    7.3113 | acc:    0.1406   
Valid |     1/1    | loss:    2.9235 | fps:   24.0964 | acc:    0.0833   

Epoch 2/34
----------
Train |     2/2    | loss:    1.2506 | fps:    7.9141 | acc:    0.6094   
Valid |     1/1    | loss:    2.9246 | fps:   22.2220 | acc:    0.0000   

Epoch 3/34
----------
Train |     2/2    | loss:    0.6308 | fps:    8.0880 | acc:    0.8750   
Valid |     1/1    | loss:    3.1111 | fps:   23.8563 | acc:    0.0000   

Epoch 4/34
----------
Train |     2/2    | loss:    0.4043 | fps:    7.9911 | acc:    0.8750   
Valid |     1/1    | loss:    3.5617 | fps:   22.9673 | acc:    0.0000   

Epoch 5/34
----------
Train |     2/2    | loss:    0.2439 | fps:    8.0181 | acc:    0.9219   
Valid |     1/1    | loss:    3.9524 | fps:   23.2107 | acc:    0.0000   

Epoch 6/34
----------
Trai