In [1]:
import cv2
from PIL import Image
import torch
from ultralytics import YOLO
from utils.system import get_available_device
from facenet_pytorch import InceptionResnetV1
from collections import defaultdict
import torch.nn.functional as F
import numpy as np
import faiss
import os


device = get_available_device()

KNOWN_FACES_PATH='./known_faces'

# Model
detector_model = YOLO("./best_models/yolo_decoder_best.pt").to(device)
classifier_model = InceptionResnetV1(pretrained='vggface2', num_classes=2, device=device).eval()

  from .autonotebook import tqdm as notebook_tqdm


GPU is not available, using CPU instead
Using device: cpu


In [2]:
known_embeddings = list()
class_embedding_names = list()

index = faiss.IndexFlatL2(512)

In [3]:
from torchvision.transforms import v2

transform = v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.uint8, scale=True),
    v2.Resize((160, 160)),  # Измените размер под вашу модель
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])


def compare_embeddings(embedding, threshold=0.7):
    # 3. Поиск ближайшего embedding:
    embedding_np = embedding.detach().numpy()
    embedding_np = embedding_np.reshape(1, -1).astype('float32')  # Убедитесь, что форма и тип данных верны

    distances, indices = index.search(embedding_np, k=1)
    if distances > threshold:
        return None
    
    # 4. Получение имени объекта по индексу:
    closest_object_name = class_embedding_names[indices[0][0]]  # indices[0][0] - индекс ближайшего embedding

    return closest_object_name
    

def detect_faces(image):
    with torch.no_grad():  # Отключите вычисление градиентов во время инференса
        result = detector_model([image])[0]
    
    return result.boxes.xyxy
    
        
def recognize_faces(image, boxes):
    cropped_faces = [
        transform(image.crop(list(map(float,bbox))))
        for bbox in boxes
    ]
    if cropped_faces:  # Убедитесь, что список не пустой
        face_tensors = torch.stack(cropped_faces)
        embeddings = classifier_model(face_tensors)
    else:
        embeddings = torch.empty(0)
        
    return embeddings

In [4]:
from utils.system import get_last_dirname


for path_dir, dir_list, file_list in os.walk(KNOWN_FACES_PATH):
    for file_path in file_list:
        image_path = os.path.join(path_dir, file_path)
        image = Image.open(image_path)
        
        boxes = detect_faces(image)
        embeddings = recognize_faces(image, boxes)
        
        name = get_last_dirname(path_dir)
        known_embeddings.append(embeddings[0])
        class_embedding_names.append(name)
        
known_embeddings_detached = [tensor.detach().numpy() for tensor in known_embeddings] # отсоединяем каждый Tensor
known_embeddings = np.array(known_embeddings_detached).astype('float32')
index.add(known_embeddings)


0: 448x640 1 face, 64.2ms
Speed: 3.1ms preprocess, 64.2ms inference, 0.9ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 face, 52.9ms
Speed: 2.4ms preprocess, 52.9ms inference, 0.8ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 face, 52.7ms
Speed: 2.7ms preprocess, 52.7ms inference, 0.8ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 face, 60.3ms
Speed: 3.2ms preprocess, 60.3ms inference, 0.9ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 face, 63.7ms
Speed: 3.1ms preprocess, 63.7ms inference, 0.8ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 face, 61.9ms
Speed: 3.5ms preprocess, 61.9ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 face, 54.7ms
Speed: 2.6ms preprocess, 54.7ms inference, 0.7ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 face, 73.5ms
Speed: 4.8ms preprocess, 73.5ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)


In [None]:
import cv2
import time


# 2.  Захват видео с веб-камеры
cap = cv2.VideoCapture(0)  # 0 - индекс веб-камеры по умолчанию. Измените, если у вас несколько камер.


# Проверка, успешно ли открыта веб-камера
if not cap.isOpened():
    raise IOError("Не удается открыть веб-камеру")


frame_count = 0
start_time = time.time()
fps = 0

# 3. Основной цикл обработки кадров
while(True):
    # Считываем кадр с веб-камеры
    ret, frame = cap.read()

    # Если кадр не был успешно считан, выходим из цикла
    if not ret:
        break
    
    frame_count += 1
    elapsed_time = time.time() - start_time
    
    
    
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_image = Image.fromarray(frame_rgb)
    
    boxes = detect_faces(pil_image)
    embeddings = recognize_faces(pil_image, boxes)
    
    for bbox, embedding in zip(boxes, embeddings):
        x1 = int(bbox[0])
        y1 = int(bbox[1])
        x2 = int(bbox[2])
        y2 = int(bbox[3])
        
        name = compare_embeddings(embedding) or 'Unknown'

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"{name}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    

    if elapsed_time > 1:  # Обновляем FPS каждую секунду
        fps = frame_count / elapsed_time
        start_time = time.time()
        frame_count = 0

    # Добавим FPS на кадр для отображения на экране
    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # 7. Отображение результата
    cv2.imshow('Обнаружение лиц', frame)

    # 8. Выход из цикла при нажатии клавиши 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    

# 9. Освобождение ресурсов
cap.release()
cv2.destroyAllWindows()




0: 384x640 1 face, 56.6ms
Speed: 4.3ms preprocess, 56.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 face, 59.4ms
Speed: 2.7ms preprocess, 59.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 face, 86.2ms
Speed: 2.8ms preprocess, 86.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 face, 52.9ms
Speed: 2.0ms preprocess, 52.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 face, 50.2ms
Speed: 1.8ms preprocess, 50.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 face, 48.5ms
Speed: 1.9ms preprocess, 48.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 face, 48.7ms
Speed: 1.7ms preprocess, 48.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 face, 48.8ms
Speed: 1.7ms preprocess, 48.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x

KeyboardInterrupt: 

: 