In [2]:
!pip install "pillow<10.0.0"
!pip install --upgrade facenet-pytorch

Collecting facenet-pytorch
  Using cached facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting numpy<2.0.0,>=1.24.0 (from facenet-pytorch)
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting Pillow<10.3.0,>=10.2.0 (from facenet-pytorch)
  Downloading pillow-10.2.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting torch<2.3.0,>=2.2.0 (from facenet-pytorch)
  Downloading torch-2.2.2-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)
Collecting torchvision<0.18.0,>=0.17.0 (from facenet-pytorch)
  Downloading torchvision-0.17.2-cp311-cp311-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3

In [3]:
import torch
import torch.nn.functional as F
from torchvision import transforms
from facenet_pytorch import MTCNN
from PIL import Image
import json
import torch.nn as nn
# ---------- KIẾN TRÚC MÔ HÌNH ----------
class EmotionCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(256, 7)  # FER2013 có 7 lớp cảm xúc
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# ---------- CẤU HÌNH ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Danh sách cảm xúc đúng thứ tự
class_names = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']

# Load mô hình
model = EmotionCNN().to(device)
model.load_state_dict(torch.load("best_model.pt", map_location=device))
model.eval()

# MTCNN để detect khuôn mặt
mtcnn = MTCNN(keep_all=True, device=device)

# Transform giống như khi huấn luyện
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((48, 48)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# ---------- DỰ ĐOÁN ----------
def detect_emotions(image_path):
    img = Image.open(image_path).convert("RGB")
    boxes, probs = mtcnn.detect(img)

    results = []
    if boxes is not None:
        for box in boxes:
            x1, y1, x2, y2 = map(int, box)
            width = x2 - x1
            height = y2 - y1

            # Crop khuôn mặt
            face = img.crop((x1, y1, x2, y2))
            face_tensor = transform(face).unsqueeze(0).to(device)

            # Dự đoán
            with torch.no_grad():
                output = model(face_tensor)
                prob = F.softmax(output, dim=1)
                confidence, pred = torch.max(prob, 1)

            result = {
                "bbox": [x1, y1, width, height],
                "emotion": class_names[pred.item()],
                "confidence": round(confidence.item(), 2)
            }
            results.append(result)
    return results

results = detect_emotions("/content/DSC_4903.JPG")
print(json.dumps(results, indent=2, ensure_ascii=False))

[
  {
    "bbox": [
      137,
      100,
      174,
      257
    ],
    "emotion": "happy",
    "confidence": 0.54
  }
]
