In [None]:
# this loads and prepares the model for inference (use inference function)

import timm
import torch
import torch.nn as nn
import torchvision.transforms as T
import cv2

# --------------------
# CONFIG
# --------------------


IMG_SIZE = 224

NUM_CLASSES = 7
CLASS_NAMES = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

WEIGHTS_PATH = r'..path to \step2_swin.pth'

base_model = timm.create_model(
    'swin_tiny_patch4_window7_224',
    pretrained=True,
    num_classes=0
)

head = nn.Sequential(
    nn.Linear(base_model.num_features, 128),
    nn.ReLU(),
    nn.Linear(128, NUM_CLASSES),
)

class SwinWithHead(nn.Module):
    def __init__(self, backbone, head):
        super().__init__()
        self.backbone = backbone
        self.head = head

    def forward(self, x):
        features = self.backbone(x)
        out = self.head(features)
        return out

model = SwinWithHead(base_model, head)

state = torch.load(WEIGHTS_PATH, map_location='cpu')
model.load_state_dict(state)

model.eval()

# preprocessing
transform = T.Compose([
    T.ToTensor(),
    T.Resize((IMG_SIZE, IMG_SIZE))
])

def inference(face_crop, img_path: str | None = None):
    if img_path is not None:
      # read image from disk (BGR)
      face_crop = cv2.imread(img_path)
      if face_crop is None:
          raise ValueError(f"Could not read image from path: {img_path}")

    # BGR → RGB
    img = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)

    # preprocess
    x = transform(img).unsqueeze(0)   # shape: (1,3,224,224)

    with torch.no_grad():
        logits = model(x)
        probs = torch.softmax(logits, dim=1)
        pred_idx = torch.argmax(probs, dim=1).item()
        pred_proba = probs[0, pred_idx].item()

    pred_class = CLASS_NAMES[pred_idx]
    return pred_class, pred_proba

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/114M [00:00<?, ?B/s]

FileNotFoundError: [Errno 2] No such file or directory: '..path to \\step2_swin.pth'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# live cam emotion detection

import cv2
import mediapipe as mp
from IPython.display import display, clear_output
from PIL import Image


mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

with mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5) as face_detection:
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # MediaPipe requires RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_detection.process(frame_rgb)

        frame_h, frame_w, _ = frame.shape
        detected = False

        if results.detections:
            detected = True
            for detection in results.detections:
                # 1. Get Bounding Box
                bboxC = detection.location_data.relative_bounding_box
                x = int(bboxC.xmin * frame_w)
                y = int(bboxC.ymin * frame_h)
                w = int(bboxC.width * frame_w)
                h = int(bboxC.height * frame_h)

                # 2. Ensure Box is within Image Boundaries (Clipping)
                x = max(0, x)
                y = max(0, y)
                w = min(w, frame_w - x)
                h = min(h, frame_h - y)

                # 3. Crop and Inference
                if w > 0 and h > 0:
                    face_crop = frame[y:y+h, x:x+w]
                    emotion, proba = inference(face_crop)

                    # 4. Display
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    cv2.putText(frame, f"{emotion}, {round(proba *100,2)}%", (x, y - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        if not detected:
            cv2.putText(frame, "No Face Detected", (50, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        clear_output(wait=True)
        display(Image.fromarray(rgb))

        if cv2.waitKey(1) & 0xFF == 27:
            break

cap.release()
cv2.destroyAllWindows()
