In [None]:
import cv2
import dlib
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image
from collections import Counter
import requests

# ========== Telegram Setup ==========
BOT_TOKEN = '8111117866:AAGZI-zZLwbaGVqAIfzKlXclJobiMPtcZe0' 
CHAT_ID = '6855415361'      
message_sent = False

def send_telegram_message(message, bot_token, chat_id):
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    payload = {
        'chat_id': chat_id,
        'text': message
    }
    try:
        response = requests.post(url, data=payload)
        if response.status_code != 200:
            print(f"Failed to send message: {response.text}")
    except Exception as e:
        print(f"Telegram error: {e}")

# ========== Model Definitions ==========
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m

        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = torch.cos(torch.tensor(m))
        self.sin_m = torch.sin(torch.tensor(m))
        self.th = torch.cos(torch.tensor(torch.pi) - m)
        self.mm = torch.sin(torch.tensor(torch.pi) - m) * m

    def forward(self, input, label):
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

class CrossAttention(nn.Module):
    def __init__(self, dim, num_heads=4):
        super(CrossAttention, self).__init__()
        self.num_heads = num_heads
        self.scale = (dim // num_heads) ** -0.5
        self.qkv = nn.Linear(dim, dim * 3, bias=False)
        self.proj = nn.Linear(dim, dim)

    def forward(self, x1, x2):
        B, C, H, W = x1.shape
        x1 = x1.flatten(2).permute(0, 2, 1)
        x2 = x2.flatten(2).permute(0, 2, 1)

        qkv1 = self.qkv(x1).chunk(3, dim=-1)
        qkv2 = self.qkv(x2).chunk(3, dim=-1)

        q, k, v = qkv1[0], qkv2[1], qkv2[2]
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        out = attn @ v
        out = self.proj(out)
        out = out.permute(0, 2, 1).reshape(B, C, H, W)
        return out

class ChannelAttention(nn.Module):
    def __init__(self, in_channels):
        super(ChannelAttention, self).__init__()
        self.cross_attention = CrossAttention(in_channels)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        ca_out = self.cross_attention(x, x)
        return self.sigmoid(ca_out) * x

class SpatialAttention(nn.Module):
    def __init__(self, in_channels):
        super(SpatialAttention, self).__init__()
        self.cross_attention = CrossAttention(in_channels)
        self.conv = nn.Conv2d(in_channels, 1, kernel_size=7, padding=3)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        sa_out = self.cross_attention(x, x)
        sa_out = self.conv(sa_out)
        return self.sigmoid(sa_out) * x

class CA_CBAM(nn.Module):
    def __init__(self, in_channels):
        super(CA_CBAM, self).__init__()
        self.channel_att = ChannelAttention(in_channels)
        self.spatial_att = SpatialAttention(in_channels)

    def forward(self, x):
        x = self.channel_att(x)
        x = self.spatial_att(x)
        return x

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.3),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.3),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.4),

            CA_CBAM(128),
            nn.AdaptiveAvgPool2d(1)
        )
        self.embedding = nn.Linear(128, 128)
        self.arc_margin = ArcMarginProduct(128, num_classes)

    def forward(self, x, label=None):
        x = self.feature_extractor(x)
        x = x.view(x.size(0), -1)
        x = self.embedding(x)
        if label is not None:
            return self.arc_margin(x, label)
        return x

# ========== Load Model ==========
num_classes = 4
class_names = ['Class 0', 'Class 1', 'Class 2', 'Class 3']

model = FaceRecognitionModel(num_classes)
checkpoint = torch.load(r"D:\Projects\Sentinel turret rover\model\best_model_checkpoint.pth", map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# ========== Transformations ==========
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# ========== Face Detection ==========
detector = dlib.get_frontal_face_detector()
cap = cv2.VideoCapture(0)

def classify_as_unknown(predictions, threshold):
    counter = Counter(predictions)
    most_common_class, most_common_count = counter.most_common(1)[0]
    print(f"Prediction Confidence: {most_common_count / len(predictions):.2f}")
    return most_common_count / len(predictions) < threshold

predictions = []

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        x, y, w, h = face.left(), face.top(), face.width(), face.height()
        face_img = frame[y:y+h, x:x+w]

        try:
            face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
            face_tensor = transform(face_pil).unsqueeze(0)

            with torch.no_grad():
                embedding = model(face_tensor)
                normalized_embedding = F.normalize(embedding)
                normalized_weights = F.normalize(model.arc_margin.weight)
                logits = F.linear(normalized_embedding, normalized_weights)
                pred_class = torch.argmax(logits, dim=1).item()

            label = class_names[pred_class]

            predictions.append(pred_class)
            # Sliding window logic (optional):
            # if len(predictions) > 25:
            #     predictions.pop(0)

            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.putText(frame, f"{label}", (x, y-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        except Exception as e:
            print(f"Error processing face: {e}")

    # Decision only after 25 predictions
    if len(predictions) == 25:
        if classify_as_unknown(predictions, threshold=0.9):
            if not message_sent:
                send_telegram_message("🚨 Intruder detected!", BOT_TOKEN, CHAT_ID)
                message_sent = True
        else:
            if not message_sent:
                send_telegram_message("🟢 Ally detected!", BOT_TOKEN, CHAT_ID)
                message_sent = True
        predictions = []  # Reset after decision

    cv2.imshow("Face Classification", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

  checkpoint = torch.load(r"D:\Projects\Sentinel turret rover\model\best_model_checkpoint.pth", map_location=torch.device('cpu'))


Prediction Confidence: 0.80
Prediction Confidence: 0.88


In [3]:
import cv2
import dlib
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image
from collections import Counter
import requests

# ========== Telegram Setup ==========
BOT_TOKEN = '8111117866:AAGZI-zZLwbaGVqAIfzKlXclJobiMPtcZe0'  # Replace with your bot token
CHAT_ID = '6855415361'  # Replace with your chat ID
message_sent = False

def send_telegram_message(message, bot_token, chat_id):
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    payload = {'chat_id': chat_id, 'text': message}
    try:
        response = requests.post(url, data=payload)
        if response.status_code != 200:
            print(f"Failed to send message: {response.text}")
    except Exception as e:
        print(f"Telegram error: {e}")

def send_telegram_photo(image, caption, bot_token, chat_id):
    url = f"https://api.telegram.org/bot{bot_token}/sendPhoto"
    _, img_encoded = cv2.imencode('.jpg', image)
    files = {'photo': ('face.jpg', img_encoded.tobytes(), 'image/jpeg')}
    data = {'chat_id': chat_id, 'caption': caption}
    try:
        response = requests.post(url, files=files, data=data)
        if response.status_code != 200:
            print(f"Failed to send photo: {response.text}")
    except Exception as e:
        print(f"Telegram photo error: {e}")

# ========== Model Definitions ==========
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m

        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = torch.cos(torch.tensor(m))
        self.sin_m = torch.sin(torch.tensor(m))
        self.th = torch.cos(torch.tensor(torch.pi) - m)
        self.mm = torch.sin(torch.tensor(torch.pi) - m) * m

    def forward(self, input, label):
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

class CrossAttention(nn.Module):
    def __init__(self, dim, num_heads=4):
        super(CrossAttention, self).__init__()
        self.num_heads = num_heads
        self.scale = (dim // num_heads) ** -0.5
        self.qkv = nn.Linear(dim, dim * 3, bias=False)
        self.proj = nn.Linear(dim, dim)

    def forward(self, x1, x2):
        B, C, H, W = x1.shape
        x1 = x1.flatten(2).permute(0, 2, 1)
        x2 = x2.flatten(2).permute(0, 2, 1)

        qkv1 = self.qkv(x1).chunk(3, dim=-1)
        qkv2 = self.qkv(x2).chunk(3, dim=-1)

        q, k, v = qkv1[0], qkv2[1], qkv2[2]
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        out = attn @ v
        out = self.proj(out)
        out = out.permute(0, 2, 1).reshape(B, C, H, W)
        return out

class ChannelAttention(nn.Module):
    def __init__(self, in_channels):
        super(ChannelAttention, self).__init__()
        self.cross_attention = CrossAttention(in_channels)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        ca_out = self.cross_attention(x, x)
        return self.sigmoid(ca_out) * x

class SpatialAttention(nn.Module):
    def __init__(self, in_channels):
        super(SpatialAttention, self).__init__()
        self.cross_attention = CrossAttention(in_channels)
        self.conv = nn.Conv2d(in_channels, 1, kernel_size=7, padding=3)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        sa_out = self.cross_attention(x, x)
        sa_out = self.conv(sa_out)
        return self.sigmoid(sa_out) * x

class CA_CBAM(nn.Module):
    def __init__(self, in_channels):
        super(CA_CBAM, self).__init__()
        self.channel_att = ChannelAttention(in_channels)
        self.spatial_att = SpatialAttention(in_channels)

    def forward(self, x):
        x = self.channel_att(x)
        x = self.spatial_att(x)
        return x

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.3),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.3),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.4),

            CA_CBAM(128),
            nn.AdaptiveAvgPool2d(1)
        )
        self.embedding = nn.Linear(128, 128)
        self.arc_margin = ArcMarginProduct(128, num_classes)

    def forward(self, x, label=None):
        x = self.feature_extractor(x)
        x = x.view(x.size(0), -1)
        x = self.embedding(x)
        if label is not None:
            return self.arc_margin(x, label)
        return x

# ========== Load Model ==========
num_classes = 4
class_names = ['Class 0', 'Class 1', 'Class 2', 'Class 3']

model = FaceRecognitionModel(num_classes)
checkpoint = torch.load(r"D:\Projects\Sentinel turret rover\model\best_model_checkpoint.pth", map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# ========== Transformations ==========
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# ========== Face Detection ==========
detector = dlib.get_frontal_face_detector()
cap = cv2.VideoCapture(0)

def classify_as_unknown(predictions, threshold):
    counter = Counter(predictions)
    most_common_class, most_common_count = counter.most_common(1)[0]
    print(f"Prediction Confidence: {most_common_count / len(predictions):.2f}")
    return most_common_count / len(predictions) < threshold

predictions = []
last_face_image = None

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        x, y, w, h = face.left(), face.top(), face.width(), face.height()
        face_img = frame[y:y+h, x:x+w]

        try:
            face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
            face_tensor = transform(face_pil).unsqueeze(0)

            with torch.no_grad():
                embedding = model(face_tensor)
                normalized_embedding = F.normalize(embedding)
                normalized_weights = F.normalize(model.arc_margin.weight)
                logits = F.linear(normalized_embedding, normalized_weights)
                pred_class = torch.argmax(logits, dim=1).item()

            label = class_names[pred_class]
            predictions.append(pred_class)
            last_face_image = frame.copy()

            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.putText(frame, f"{label}", (x, y-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        except Exception as e:
            print(f"Error processing face: {e}")

    if len(predictions) == 25:
        if classify_as_unknown(predictions, threshold=0.97):
            if not message_sent:
                if last_face_image is not None:
                    send_telegram_photo(last_face_image, "🚨 Intruder detected!", BOT_TOKEN, CHAT_ID)
                else:
                    send_telegram_message("🚨 Intruder detected!", BOT_TOKEN, CHAT_ID)
                message_sent = True
        else:
            if not message_sent:
                if last_face_image is not None:
                    send_telegram_photo(last_face_image, "🟢 Ally detected!", BOT_TOKEN, CHAT_ID)
                else:
                    send_telegram_message("🟢 Ally detected!", BOT_TOKEN, CHAT_ID)
                message_sent = True
        predictions = []

    cv2.imshow("Face Classification", frame)
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


  checkpoint = torch.load(r"D:\Projects\Sentinel turret rover\model\best_model_checkpoint.pth", map_location=torch.device('cpu'))


Prediction Confidence: 1.00
Prediction Confidence: 0.68


In [2]:
import cv2
import dlib
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image
from collections import Counter
import requests

# ========== Telegram Setup ==========
BOT_TOKEN = '8111117866:AAGZI-zZLwbaGVqAIfzKlXclJobiMPtcZe0'  # Replace with your bot token
CHAT_ID = '6855415361'  # Replace with your chat ID

def send_telegram_message(message, bot_token, chat_id):
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
    payload = {'chat_id': chat_id, 'text': message}
    try:
        response = requests.post(url, data=payload)
        if response.status_code != 200:
            print(f"Failed to send message: {response.text}")
    except Exception as e:
        print(f"Telegram error: {e}")

def send_telegram_photo(image, caption, bot_token, chat_id):
    url = f"https://api.telegram.org/bot{bot_token}/sendPhoto"
    _, img_encoded = cv2.imencode('.jpg', image)
    files = {'photo': ('face.jpg', img_encoded.tobytes(), 'image/jpeg')}
    data = {'chat_id': chat_id, 'caption': caption}
    try:
        response = requests.post(url, files=files, data=data)
        if response.status_code != 200:
            print(f"Failed to send photo: {response.text}")
    except Exception as e:
        print(f"Telegram photo error: {e}")

# ========== Model Definitions ==========
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m

        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = torch.cos(torch.tensor(m))
        self.sin_m = torch.sin(torch.tensor(m))
        self.th = torch.cos(torch.tensor(torch.pi) - m)
        self.mm = torch.sin(torch.tensor(torch.pi) - m) * m

    def forward(self, input, label):
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

class CrossAttention(nn.Module):
    def __init__(self, dim, num_heads=4):
        super(CrossAttention, self).__init__()
        self.num_heads = num_heads
        self.scale = (dim // num_heads) ** -0.5
        self.qkv = nn.Linear(dim, dim * 3, bias=False)
        self.proj = nn.Linear(dim, dim)

    def forward(self, x1, x2):
        B, C, H, W = x1.shape
        x1 = x1.flatten(2).permute(0, 2, 1)
        x2 = x2.flatten(2).permute(0, 2, 1)

        qkv1 = self.qkv(x1).chunk(3, dim=-1)
        qkv2 = self.qkv(x2).chunk(3, dim=-1)

        q, k, v = qkv1[0], qkv2[1], qkv2[2]
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        out = attn @ v
        out = self.proj(out)
        out = out.permute(0, 2, 1).reshape(B, C, H, W)
        return out

class ChannelAttention(nn.Module):
    def __init__(self, in_channels):
        super(ChannelAttention, self).__init__()
        self.cross_attention = CrossAttention(in_channels)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        ca_out = self.cross_attention(x, x)
        return self.sigmoid(ca_out) * x

class SpatialAttention(nn.Module):
    def __init__(self, in_channels):
        super(SpatialAttention, self).__init__()
        self.cross_attention = CrossAttention(in_channels)
        self.conv = nn.Conv2d(in_channels, 1, kernel_size=7, padding=3)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        sa_out = self.cross_attention(x, x)
        sa_out = self.conv(sa_out)
        return self.sigmoid(sa_out) * x

class CA_CBAM(nn.Module):
    def __init__(self, in_channels):
        super(CA_CBAM, self).__init__()
        self.channel_att = ChannelAttention(in_channels)
        self.spatial_att = SpatialAttention(in_channels)

    def forward(self, x):
        x = self.channel_att(x)
        x = self.spatial_att(x)
        return x

class FaceRecognitionModel(nn.Module):
    def __init__(self, num_classes):
        super(FaceRecognitionModel, self).__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.3),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.3),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.4),

            CA_CBAM(128),
            nn.AdaptiveAvgPool2d(1)
        )
        self.embedding = nn.Linear(128, 128)
        self.arc_margin = ArcMarginProduct(128, num_classes)

    def forward(self, x, label=None):
        x = self.feature_extractor(x)
        x = x.view(x.size(0), -1)
        x = self.embedding(x)
        if label is not None:
            return self.arc_margin(x, label)
        return x

# ========== Load Model ==========
num_classes = 4
class_names = ['Class 0', 'Class 1', 'Class 2', 'Class 3']

model = FaceRecognitionModel(num_classes)
checkpoint = torch.load(r"D:\Projects\Sentinel turret rover\model\best_model_checkpoint.pth", map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# ========== Transformations ==========
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# ========== Face Detection ==========
detector = dlib.get_frontal_face_detector()
cap = cv2.VideoCapture(0)

def classify_as_unknown(predictions, threshold):
    counter = Counter(predictions)
    most_common_class, most_common_count = counter.most_common(1)[0]
    print(f"Prediction Confidence: {most_common_count / len(predictions):.2f}")
    return most_common_count / len(predictions) <= threshold

# ========== Track Faces and Predictions ==========
predictions_dict = {}
message_sent_dict = {}
face_images = {}

def face_key(face):
    return (face.left(), face.top(), face.width(), face.height())

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)

    for face in faces:
        key = face_key(face)
        x, y, w, h = face.left(), face.top(), face.width(), face.height()
        face_img = frame[y:y+h, x:x+w]

        try:
            face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
            face_tensor = transform(face_pil).unsqueeze(0)

            with torch.no_grad():
                embedding = model(face_tensor)
                normalized_embedding = F.normalize(embedding)
                normalized_weights = F.normalize(model.arc_margin.weight)
                logits = F.linear(normalized_embedding, normalized_weights)
                pred_class = torch.argmax(logits, dim=1).item()

            label = class_names[pred_class]
            predictions_dict.setdefault(key, []).append(pred_class)
            face_images[key] = frame.copy()

            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.putText(frame, f"{label}", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

            if len(predictions_dict[key]) == 25 and not message_sent_dict.get(key, False):
                if classify_as_unknown(predictions_dict[key], threshold=0.96):
                    send_telegram_photo(face_images[key], "🚨 Intruder detected!", BOT_TOKEN, CHAT_ID)
                else:
                    send_telegram_photo(face_images[key], "🟢 Ally detected!", BOT_TOKEN, CHAT_ID)
                message_sent_dict[key] = True
                predictions_dict[key] = []

        except Exception as e:
            print(f"Error processing face: {e}")

    cv2.imshow("Face Classification", frame)
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


  checkpoint = torch.load(r"D:\Projects\Sentinel turret rover\model\best_model_checkpoint.pth", map_location=torch.device('cpu'))


Prediction Confidence: 1.00
Prediction Confidence: 1.00
Prediction Confidence: 1.00
