In [4]:
import cv2
import time
import numpy as np
from collections import Counter
from PIL import Image
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
import tensorflow as tf
import pickle

In [5]:


# cài đặt Thiết bị đưa lên gpu, ko thì cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load model TensorFlow (facenet classifier)
model = tf.keras.models.load_model("facenet.h5")

# Load MTCNN và FaceNet (pytorch)
mtcnn = MTCNN(image_size=160, margin=20, device=device)
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Load nhãn
# Load label encoder
with open("C:/Users/Loc/Desktop/Do_An_Co_So/code/code_final/label_encoder.pkl", "rb") as f:
    encoder = pickle.load(f)
class_names = encoder.classes_  # Lấy nhãn 

# Gán độ tin cậy, dùng để phân biệt đặc trưng người trong tập huấn luyện hay là người lạ
confidence_threshold = 0.6


prediction_results = [] # lưu kq dự đoán

# Mở webcam
cap = cv2.VideoCapture(0)
print("[INFO] Webcam đang mở. Nhận diện trong 20 giây...")
start_time = time.time()
duration = 20 # trong thời gian 20 giây để nhận dạng

while True:
    ret, frame = cap.read()   # lấy khung hình từ webcamm
    if not ret:
        break
    frame = cv2.GaussianBlur(frame, (5, 5), 0)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # chuyển ảnh khung hình sang định dạng phù hợp (RGB)
    img_pil = Image.fromarray(rgb_frame)

    boxes, probs = mtcnn.detect(img_pil) # Từ khung hình, phát hiện khuôn mặt
    if boxes is not None:
        for box in boxes:

            # Cắt khuôn mặt ra khỏi khung hình gốc bằng các tọa độ x1,x2,x3,x4
            x1, y1, x2, y2 = [int(max(0, b)) for b in box]  # 
            x2 = min(x2, frame.shape[1])
            y2 = min(y2, frame.shape[0])
            face = frame[y1:y2, x1:x2]
            if face.size == 0:
                continue
            try:
                face_pil = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
                face_tensor = mtcnn(face_pil)
                if face_tensor is None:
                    continue
                    
                # Trích xuất đặc trưng ảnh của khung hình webcam
                face_tensor = face_tensor.unsqueeze(0).to(device)
                with torch.no_grad():
                    embedding = facenet(face_tensor).cpu().numpy()

                # Dự đoán với TensorFlow model
                pred = model.predict(embedding)
                pred_index = np.argmax(pred)
                confidence = pred[0][pred_index]

                # So sánh độ tin cậy, nếu treen 0.6 thì độ tin cậy cao và nhận dạng thành công
                # nếu thấp hơn thì gán là unknown
                if confidence >= confidence_threshold:
                    label = class_names[pred_index]
                else:
                    label = "Unknown"
                #  Lưu kq vào nhãn
                prediction_results.append(label)

                # Vẽ khung hình nhận dạng người lạ ( màu đỏ)
                color = (0, 255, 0) if label != "Unknown" else (0, 0, 255)
                text = f"{label} ({confidence*100:.2f}%)" if label != "Unknown" else "Unknown"

                # vẽ khung hình nhận dạng chính xác( màu xanh )
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

            except Exception as e:
                print("Lỗi xử lý khuôn mặt:", e)
                continue

    cv2.imshow("Face Recognition (FaceNet)", frame)

    # giới hạn thời gian( 20 giây tự động ngắt)
    if time.time() - start_time > duration:
        break
    # kết thúc, ngưng webcam (quit)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# In ra kết quả 
if prediction_results:
    most_common_label, count = Counter(prediction_results).most_common(1)[0]
    print(f"\n-----------------------Kết quả nhận diện-------------------\n ---------------------------{most_common_label}---------------------- \n Nhân vật {most_common_label} xuất hiện {count} lần trong {len(prediction_results)} lần nhận dạng")
else:
    print("\nKhông nhận dạng được khuôn mặt nào, yêu cầu vào khung hình.")




[INFO] Webcam đang mở. Nhận diện trong 20 giây...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 292ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m 

: 