<a href="https://colab.research.google.com/github/viniciusGuerras/facial_recognition/blob/main/face_net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importações e processamentos

Imports e Downloads

In [None]:
!pip install chromadb --quiet
!pip install pytorch-metric-learning



In [None]:
from pytorch_metric_learning import losses, miners
from torch.utils.data import DataLoader, Dataset
from chromadb.utils import embedding_functions
from torchvision import datasets, transforms
from torch.nn import TripletMarginLoss
from google.colab import drive
import torch.nn.functional as F
from PIL import Image
import torch.nn as nn
import numpy as np
import chromadb
import random
import torch
import os

In [None]:
client = chromadb.Client()
drive.mount('/content/gdrive')
!unzip "/content/gdrive/MyDrive/post-processed.zip" -d "/content/data"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Archive:  /content/gdrive/MyDrive/post-processed.zip
replace /content/data/post-processed/AJ_Lamas/AJ_Lamas_0001_0000.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


Classe para ajudar com o "path" das imagens no dataset:

In [None]:
class PathImageFolder(Dataset):
    def __init__(self, image_folder_dataset, transform=None):
        self.dataset = image_folder_dataset
        self.samples = self.dataset.samples
        self.transform = transform

    def __getitem__(self, index):
        img_path, label = self.samples[index]

        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)

        return img, img_path, label

    def __len__(self):
        return len(self.samples)

Carrega o Dataset:

In [None]:
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.RandomResizedCrop(160, scale=(0.9, 1.0)),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05),
    transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
raw_dataset = []
dataset = PathImageFolder(raw_dataset, transform=transform)
dataloader = DataLoader(dataset, batch_size=128, num_workers=4)

## Criação do modelo

In [None]:
class ResidualBottleneckBlock(nn.Module):
  expansion = 2
  def __init__(self, inplanes, planes, stride=1, downsample=None):
      super().__init__()
      #layer 1
      self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
      self.bn1 = nn.BatchNorm2d(planes)
      #layer 2
      self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
      self.bn2 = nn.BatchNorm2d(planes)
      #layer 3
      self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
      self.bn3 = nn.BatchNorm2d(planes * self.expansion)

      self.relu = nn.ReLU(inplace=True)
      self.downsample = downsample

  def forward(self, x):
      identity = x
      out = self.conv1(x)
      out = self.bn1(out)
      out = self.relu(out)

      out = self.conv2(out)
      out = self.bn2(out)
      out = self.relu(out)

      out = self.conv3(out)
      out = self.bn3(out)
      if self.downsample is not None:
          identity = self.downsample(x)
      out += identity
      out = self.relu(out)
      return out

In [None]:
import torch.nn.functional as F

class FaceNetResNet(nn.Module):
    def __init__(self, block=ResidualBottleneckBlock, layers=[2,2,2,2], embedding_size=512):
        super().__init__()
        self.inplanes = 64
        #first layer
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        #blocks layers
        self.layer1 = self._make_layer(block, 64,  layers[0], stride=1)
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        #fully connected
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(512 * ResidualBottleneckBlock.expansion, embedding_size)

    def _make_layer(self, block, planes, blocks, stride=1):
      downsample = None
      if stride != 1 or self.inplanes != planes * block.expansion:
        downsample = nn.Sequential(
            nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(planes * block.expansion),
        )
      layers = [block(self.inplanes, planes, stride, downsample)]
      self.inplanes = planes * block.expansion
      for _ in range(1, blocks):
          layers.append(block(self.inplanes, planes))
      return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return F.normalize(x, p=2, dim=1)

## Loop de treinamento

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = FaceNetResNet().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4)
loss_fn = losses.TripletMarginLoss(margin=0.8)
miner = miners.MultiSimilarityMiner(epsilon=0.1)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
epochs = 250

for epoch in range(epochs):
  skipped = 0
  total_loss = 0
  batch_count = 0

  for batch_idx, batch in enumerate(dataloader):
    imgs, paths, labels = batch
    imgs = imgs.to(device)
    labels = labels.to(device)
    embeddings = model(imgs)
    hard_triplets = miner(embeddings, labels)

    if len(hard_triplets) == 0:
      skipped+= 1
      continue

    loss = loss_fn(embeddings, labels, hard_triplets)

    total_loss += loss.item()
    batch_count += 1

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch_idx % 50 == 0:
      print(f"Batch: {batch_idx}, Loss: {loss.item()}")

  avg_loss = total_loss / batch_count if batch_count > 0 else 0
  scheduler.step(avg_loss)
  print(f"Epoch {epoch+1} complete. Average Loss: {avg_loss:.4f}. Skipped batches: {skipped}/{len(dataloader)}")

Using device: cuda
Batch: 0, Loss: 0.8203836679458618
Batch: 50, Loss: 0.8307543992996216
Epoch 1 complete. Average Loss: 0.8027. Skipped batches: 0/94
Batch: 0, Loss: 0.8402153849601746
Batch: 50, Loss: 0.7933616042137146
Epoch 2 complete. Average Loss: 0.8040. Skipped batches: 0/94
Batch: 0, Loss: 0.7919193506240845
Batch: 50, Loss: 0.7674819231033325
Epoch 3 complete. Average Loss: 0.8015. Skipped batches: 0/94
Batch: 0, Loss: 0.8217498064041138
Batch: 50, Loss: 0.7852499485015869
Epoch 4 complete. Average Loss: 0.8023. Skipped batches: 0/94
Batch: 0, Loss: 0.8413223028182983
Batch: 50, Loss: 0.7776561379432678
Epoch 5 complete. Average Loss: 0.8031. Skipped batches: 0/94
Batch: 0, Loss: 0.8121336102485657
Batch: 50, Loss: 0.8041819334030151
Epoch 6 complete. Average Loss: 0.8015. Skipped batches: 0/94
Batch: 0, Loss: 0.8388248085975647
Batch: 50, Loss: 0.8160836696624756
Epoch 7 complete. Average Loss: 0.8028. Skipped batches: 0/94
Batch: 0, Loss: 0.8132558465003967
Batch: 50, Loss

usa o modelo em todas as imagens e as salva na db

In [None]:
all_embeddings = []
all_labels = []
all_paths = []

model.eval()

with torch.no_grad():
    for imgs, paths, labels in dataloader:
      imgs = imgs.to(device)
      emb = model(imgs).cpu().numpy()
      all_embeddings.extend(emb.tolist())
      all_paths.extend(paths)
      all_labels.extend(labels.tolist())

collection = client.create_collection(
    name="faces",
    embedding_function=None,
    get_or_create=True
)

chromas_batch_size = 5000
num_embeddings = len(all_embeddings)

for i in range(0, num_embeddings, chromas_batch_size):
    batch_embeddings = all_embeddings[i:i+chromas_batch_size]
    batch_ids = [f"img_{j}" for j in range(i, i+len(batch_embeddings))]
    batch_metadatas = [{"label": label} for label in all_labels[i:i+chromas_batch_size]]
    batch_documents = [path for path in all_paths[i:i+chromas_batch_size]]

    collection.add(
        ids=batch_ids,
        embeddings=batch_embeddings,
        metadatas=batch_metadatas,
        documents=batch_documents
    )