## Prepare tools

In [1]:
import torch
from torch.autograd import Variable
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init
from scipy.io import loadmat
import numpy as np
import pandas as pd
from pathlib import Path
from torch.utils.data import Dataset, DataLoader

In [2]:
torch.cuda.is_available()

n_gpu = torch.cuda.device_count()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

t = torch.cuda.get_device_properties(0).total_memory
r = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = r-a  # free inside reserved

print("Number of GPU: ", n_gpu, type(device))
print("total GPU memory: ", t, " memory reserved: ", r, "memory allocated: ", a)

Number of GPU:  1 <class 'torch.device'>
total GPU memory:  15835660288  memory reserved:  0 memory allocated:  0


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Setup model

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

class RDNet(nn.Module):
    def __init__(self, num_face=2, num_dist=2, num_mask=2):
        super(RDNet, self).__init__()

        self.in_channels = 64
        self.conv1 = nn.Conv2d(1, self.in_channels, kernel_size=3, stride=2, padding=1)
        self.bn1 = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)

        # Adding more depth with Residual Blocks
        self.layer1 = self._make_layer(128, stride=2)
        self.layer2 = self._make_layer(256, stride=2)
        self.layer3 = self._make_layer(512, stride=2)
        self.drop = nn.Dropout(p=0.3)

        self.adaptivePool = nn.AdaptiveAvgPool2d((1, 1))

        # Increase model capacity in fully connected layers
        self.face_fc1 = nn.Linear(512, 2048)
        self.face_fc2 = nn.Linear(2048, 2048)
        self.face_fc3 = nn.Linear(2048, 1024)
        self.face_fc4 = nn.Linear(1024, 1024)
        self.face_fc5 = nn.Linear(1024, 1024)
        self.face_fc6 = nn.Linear(1024, 1024)
        self.face_fc7 = nn.Linear(1024, 1024)
        self.face_fc8 = nn.Linear(1024, 512)
        self.face_fc9 = nn.Linear(512, 512)
        self.face_fc10 = nn.Linear(512, num_face)

        self.dist_fc1 = nn.Linear(512 + num_face, 256)
        self.dist_fc2 = nn.Linear(256, 256)
        self.dist_fc3 = nn.Linear(256, 256)
        self.dist_fc4 = nn.Linear(256, 128)
        self.dist_fc5 = nn.Linear(128, num_dist)

        self.mask_fc1 = nn.Linear(512 + num_face, 256)
        self.mask_fc2 = nn.Linear(256, 256)
        self.mask_fc3 = nn.Linear(256, 256)
        self.mask_fc4 = nn.Linear(256, 128)
        self.mask_fc5 = nn.Linear(128, num_mask)

    def _make_layer(self, out_channels, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels),
            )
        layer = ResidualBlock(self.in_channels, out_channels, stride, downsample)
        self.in_channels = out_channels
        return layer

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.drop(x)
        x = self.adaptivePool(x)
        x_cnn_output = x.view(x.size(0), -1)

        x_face = F.relu(self.face_fc1(x_cnn_output))
        x_face = F.relu(self.face_fc2(x_face))
        x_face = F.relu(self.face_fc3(x_face))
        x_face = F.relu(self.face_fc4(x_face))
        x_face = F.relu(self.face_fc5(x_face))
        x_face = F.relu(self.face_fc6(x_face))
        x_face = F.relu(self.face_fc7(x_face))
        x_face = F.relu(self.face_fc8(x_face))
        x_face = F.relu(self.face_fc9(x_face))
        x_face_output = torch.sigmoid(self.face_fc10(x_face))

        x_dist_input = torch.cat((x_cnn_output, x_face_output), 1)
        x_dist = F.relu(self.dist_fc1(x_dist_input))
        x_dist = F.relu(self.dist_fc2(x_dist))
        x_dist = F.relu(self.dist_fc3(x_dist))
        x_dist = F.relu(self.dist_fc4(x_dist))
        x_dist_output = torch.sigmoid(self.dist_fc5(x_dist))

        x_mask_input = torch.cat((x_cnn_output, x_face_output), 1)
        x_mask = F.relu(self.mask_fc1(x_mask_input))
        x_mask = F.relu(self.mask_fc2(x_mask))
        x_mask = F.relu(self.mask_fc3(x_mask))
        x_mask = F.relu(self.mask_fc4(x_mask))
        x_mask_output = torch.sigmoid(self.mask_fc5(x_mask))

        return [x_face_output, x_dist_output, x_mask_output]

model = RDNet().to(device)

# Calculate total parameters and model size in bytes
param_size = sum(p.numel() * p.element_size() for p in model.parameters())
buffer_size = sum(b.numel() * b.element_size() for b in model.buffers())
total_size = param_size + buffer_size


## Load model

In [5]:
model_load_path = './drive/MyDrive/AcFace_AE/RD-Net/Model/scalability/model_nu10.pth'  # The path where your model is saved
model.load_state_dict(torch.load(model_load_path))

<All keys matched successfully>

## Setup dataloader

In [6]:
class AudioFaceDataset(Dataset):
    def __init__(self, data_dir, split='train', transform=None, target_transform=None):
        self.data_dir = data_dir
        self.split = split
        self.transform = transform
        self.target_transform = target_transform
        self.all_labels = self.get_all_label_df()  # Get all labels without splitting
        self.labels = self.split_labels()  # Split the labels according to the specified split

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        row = self.labels.iloc[idx]
        label = row["label"]
        path = row["path"]
        data = self.read_mat_cnn(path)
        if self.transform:
            data = self.transform(data)
        if self.target_transform:
            label = self.target_transform(label)

        identifier = path

        return data, label, identifier

    @staticmethod
    def read_mat_cnn(file):
        data = loadmat(file)["mat_concat"]
        data_tmp = np.expand_dims(data, axis=0)
        return data_tmp.astype(np.float32)

    def list_all_mat_files(self):
        all_files = [str(x.absolute()) for x in Path(self.data_dir).glob("**/*.mat")]
        # print(f"Found {len(all_files)} .mat files in {self.data_dir}")
        return all_files

    def convert_path_to_label(self, path_str):
        label_start_idx = path_str.rfind('.mat')
        face_label = path_str[label_start_idx-3]
        mask_label = path_str[label_start_idx-2]
        dist_label = path_str[label_start_idx-1]
        return "_".join([face_label, dist_label, mask_label])

    def get_all_label_df(self):
        label_dict = {}
        for file in self.list_all_mat_files():
            label = self.convert_path_to_label(file)
            label_dict[file] = label

        label_df = pd.DataFrame.from_dict(label_dict, orient="index").reset_index().rename(columns={"index": "path", 0: "label"})
        return label_df

    def split_labels(self):
        all_labels_shuffled = self.all_labels.sample(frac=1).reset_index(drop=True)  # Ensure reproducibility with random_state
        if self.split == 'train':
            return all_labels_shuffled.sample(frac=0.8)  # Use all data for training
        elif self.split == 'test':
            return all_labels_shuffled.sample(frac=1)  # Use 20% of the data for testing
        else:
            raise ValueError("Split must be 'train' or 'test'.")


## Load data and test - 10 users

In [7]:
test_dir = './drive/MyDrive/AcFace_AE/RD-Net/Dataset/Scalability/NU10'

batch_size = 128
data_test = AudioFaceDataset(test_dir, split='test')
data_test_loader = DataLoader(dataset=data_test,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8)

print("Data loader setup complete.")

Data loader setup complete.




In [8]:
import torch
import numpy as np
import time
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.

model.eval()

acc_list = []
cost_list = []
incorrect_samples = []
predictions = []
true_labels = []

for i, (test_X, test_Y, sample_ids) in enumerate(data_test_loader):
    face_Y, dist_Y, mask_Y = [], [], []
    for Y_i in test_Y:
        underline_idx = Y_i.find("_")
        face_Y.append(int(Y_i[underline_idx-1]))
        dist_Y.append(int(Y_i[underline_idx+1]))
        mask_Y.append(int(Y_i[underline_idx+3]))

    X = test_X.to(device)
    face_Y = torch.LongTensor(face_Y).to(device)
    dist_Y = torch.LongTensor(dist_Y).to(device)
    mask_Y = torch.LongTensor(mask_Y).to(device)

    with torch.no_grad():
        output = model(X)

        cost_face = criterion(output[0], face_Y)
        cost_dist = criterion(output[1], dist_Y)
        cost_mask = criterion(output[2], mask_Y)
        cost = cost_face - 0.015 * cost_dist - 0.01 * cost_mask

        accuracy = (torch.max(output[0], 1)[1] == face_Y).float().mean().item()

        acc_list.append(accuracy)
        cost_list.append(cost.item())

        predictions.extend(torch.max(output[0], 1)[1].cpu().numpy())
        true_labels.extend(face_Y.cpu().numpy())

        print(f'Batch {i} averaged accuracy: {accuracy*100:.2f} %')

        incorrect_predictions = (torch.max(output[0], 1)[1] != face_Y)
        incorrect_indices = [i for i, x in enumerate(incorrect_predictions) if x]
        incorrect_samples.extend([sample_ids[idx] for idx in incorrect_indices])

if acc_list:  # Check if acc_list is not empty
    print('\nAveraged Accuracy: {:2.2f} %'.format(np.mean(acc_list) * 100))
else:
    raise Exception("\nNo valid accuracy computations were performed.")



Batch 0 averaged accuracy: 96.09 %
Batch 1 averaged accuracy: 97.66 %
Batch 2 averaged accuracy: 99.22 %
Batch 3 averaged accuracy: 96.09 %
Batch 4 averaged accuracy: 96.09 %
Batch 5 averaged accuracy: 96.88 %
Batch 6 averaged accuracy: 98.44 %
Batch 7 averaged accuracy: 97.66 %
Batch 8 averaged accuracy: 94.53 %
Batch 9 averaged accuracy: 97.66 %
Batch 10 averaged accuracy: 97.66 %
Batch 11 averaged accuracy: 98.44 %
Batch 12 averaged accuracy: 97.66 %
Batch 13 averaged accuracy: 100.00 %
Batch 14 averaged accuracy: 95.31 %
Batch 15 averaged accuracy: 98.44 %
Batch 16 averaged accuracy: 98.44 %
Batch 17 averaged accuracy: 96.88 %
Batch 18 averaged accuracy: 97.66 %
Batch 19 averaged accuracy: 97.66 %
Batch 20 averaged accuracy: 95.31 %
Batch 21 averaged accuracy: 96.88 %
Batch 22 averaged accuracy: 96.88 %
Batch 23 averaged accuracy: 96.88 %
Batch 24 averaged accuracy: 96.88 %

Averaged Accuracy: 97.25 %


## Load data and test - 11 users

In [9]:
test_dir = './drive/MyDrive/AcFace_AE/RD-Net/Dataset/Scalability/NU11'

batch_size = 128
data_test = AudioFaceDataset(test_dir, split='test')
data_test_loader = DataLoader(dataset=data_test,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8)

print("Data loader setup complete.")

Data loader setup complete.




In [10]:
import torch
import numpy as np
import time
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.

model.eval()

acc_list = []
cost_list = []
incorrect_samples = []
predictions = []
true_labels = []

for i, (test_X, test_Y, sample_ids) in enumerate(data_test_loader):
    face_Y, dist_Y, mask_Y = [], [], []
    for Y_i in test_Y:
        underline_idx = Y_i.find("_")
        face_Y.append(int(Y_i[underline_idx-1]))
        dist_Y.append(int(Y_i[underline_idx+1]))
        mask_Y.append(int(Y_i[underline_idx+3]))

    X = test_X.to(device)
    face_Y = torch.LongTensor(face_Y).to(device)
    dist_Y = torch.LongTensor(dist_Y).to(device)
    mask_Y = torch.LongTensor(mask_Y).to(device)

    with torch.no_grad():
        output = model(X)

        cost_face = criterion(output[0], face_Y)
        cost_dist = criterion(output[1], dist_Y)
        cost_mask = criterion(output[2], mask_Y)
        cost = cost_face - 0.015 * cost_dist - 0.01 * cost_mask

        accuracy = (torch.max(output[0], 1)[1] == face_Y).float().mean().item()

        acc_list.append(accuracy)
        cost_list.append(cost.item())

        predictions.extend(torch.max(output[0], 1)[1].cpu().numpy())
        true_labels.extend(face_Y.cpu().numpy())

        print(f'Batch {i} averaged accuracy: {accuracy*100:.2f} %')

        incorrect_predictions = (torch.max(output[0], 1)[1] != face_Y)
        incorrect_indices = [i for i, x in enumerate(incorrect_predictions) if x]
        incorrect_samples.extend([sample_ids[idx] for idx in incorrect_indices])

if acc_list:  # Check if acc_list is not empty
    print('\nAveraged Accuracy: {:2.2f} %'.format(np.mean(acc_list) * 100))
else:
    raise Exception("\nNo valid accuracy computations were performed.")

Batch 0 averaged accuracy: 93.75 %
Batch 1 averaged accuracy: 97.66 %
Batch 2 averaged accuracy: 98.44 %
Batch 3 averaged accuracy: 97.66 %

Averaged Accuracy: 96.88 %


## Load data and test - 12 users

In [11]:
test_dir = './drive/MyDrive/AcFace_AE/RD-Net/Dataset/Scalability/NU12'

batch_size = 128
data_test = AudioFaceDataset(test_dir, split='test')
data_test_loader = DataLoader(dataset=data_test,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8)

print("Data loader setup complete.")

Data loader setup complete.




In [12]:
import torch
import numpy as np
import time
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.

model.eval()

acc_list = []
cost_list = []
incorrect_samples = []
predictions = []
true_labels = []

for i, (test_X, test_Y, sample_ids) in enumerate(data_test_loader):
    face_Y, dist_Y, mask_Y = [], [], []
    for Y_i in test_Y:
        underline_idx = Y_i.find("_")
        face_Y.append(int(Y_i[underline_idx-1]))
        dist_Y.append(int(Y_i[underline_idx+1]))
        mask_Y.append(int(Y_i[underline_idx+3]))

    X = test_X.to(device)
    face_Y = torch.LongTensor(face_Y).to(device)
    dist_Y = torch.LongTensor(dist_Y).to(device)
    mask_Y = torch.LongTensor(mask_Y).to(device)

    with torch.no_grad():
        output = model(X)

        cost_face = criterion(output[0], face_Y)
        cost_dist = criterion(output[1], dist_Y)
        cost_mask = criterion(output[2], mask_Y)
        cost = cost_face - 0.015 * cost_dist - 0.01 * cost_mask

        accuracy = (torch.max(output[0], 1)[1] == face_Y).float().mean().item()

        acc_list.append(accuracy)
        cost_list.append(cost.item())

        predictions.extend(torch.max(output[0], 1)[1].cpu().numpy())
        true_labels.extend(face_Y.cpu().numpy())

        print(f'Batch {i} averaged accuracy: {accuracy*100:.2f} %')

        incorrect_predictions = (torch.max(output[0], 1)[1] != face_Y)
        incorrect_indices = [i for i, x in enumerate(incorrect_predictions) if x]
        incorrect_samples.extend([sample_ids[idx] for idx in incorrect_indices])

if acc_list:  # Check if acc_list is not empty
    print('\nAveraged Accuracy: {:2.2f} %'.format(np.mean(acc_list) * 100))
else:
    raise Exception("\nNo valid accuracy computations were performed.")

Batch 0 averaged accuracy: 95.31 %
Batch 1 averaged accuracy: 93.75 %
Batch 2 averaged accuracy: 94.53 %
Batch 3 averaged accuracy: 96.09 %
Batch 4 averaged accuracy: 95.31 %
Batch 5 averaged accuracy: 95.65 %

Averaged Accuracy: 95.11 %


## Load data and test - 13 users

In [13]:
test_dir = './drive/MyDrive/AcFace_AE/RD-Net/Dataset/Scalability/NU13'

batch_size = 128
data_test = AudioFaceDataset(test_dir, split='test')
data_test_loader = DataLoader(dataset=data_test,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8)

print("Data loader setup complete.")

Data loader setup complete.




In [14]:
import torch
import numpy as np
import time
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

criterion = torch.nn.CrossEntropyLoss()

model.eval()

acc_list = []
cost_list = []
incorrect_samples = []
predictions = []
true_labels = []

for i, (test_X, test_Y, sample_ids) in enumerate(data_test_loader):
    face_Y, dist_Y, mask_Y = [], [], []
    for Y_i in test_Y:
        underline_idx = Y_i.find("_")
        face_Y.append(int(Y_i[underline_idx-1]))
        dist_Y.append(int(Y_i[underline_idx+1]))
        mask_Y.append(int(Y_i[underline_idx+3]))

    X = test_X.to(device)
    face_Y = torch.LongTensor(face_Y).to(device)
    dist_Y = torch.LongTensor(dist_Y).to(device)
    mask_Y = torch.LongTensor(mask_Y).to(device)

    with torch.no_grad():
        output = model(X)

        cost_face = criterion(output[0], face_Y)
        cost_dist = criterion(output[1], dist_Y)
        cost_mask = criterion(output[2], mask_Y)
        cost = cost_face - 0.015 * cost_dist - 0.01 * cost_mask

        accuracy = (torch.max(output[0], 1)[1] == face_Y).float().mean().item()

        acc_list.append(accuracy)
        cost_list.append(cost.item())

        predictions.extend(torch.max(output[0], 1)[1].cpu().numpy())
        true_labels.extend(face_Y.cpu().numpy())

        print(f'Batch {i} averaged accuracy: {accuracy*100:.2f} %')

        incorrect_predictions = (torch.max(output[0], 1)[1] != face_Y)
        incorrect_indices = [i for i, x in enumerate(incorrect_predictions) if x]
        incorrect_samples.extend([sample_ids[idx] for idx in incorrect_indices])

if acc_list:  # Check if acc_list is not empty
    print('\nAveraged Accuracy: {:2.2f} %'.format(np.mean(acc_list) * 100))
else:
    raise Exception("\nNo valid accuracy computations were performed.")

Batch 0 averaged accuracy: 95.31 %
Batch 1 averaged accuracy: 95.31 %
Batch 2 averaged accuracy: 93.75 %
Batch 3 averaged accuracy: 96.09 %
Batch 4 averaged accuracy: 94.79 %

Averaged Accuracy: 95.05 %


## Load data and test - 14 users

In [15]:
test_dir = './drive/MyDrive/AcFace_AE/RD-Net/Dataset/Scalability/NU14'

batch_size = 128
data_test = AudioFaceDataset(test_dir, split='test')
data_test_loader = DataLoader(dataset=data_test,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8)

print("Data loader setup complete.")

Data loader setup complete.




In [16]:
import torch
import numpy as np
import time
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.

model.eval()

acc_list = []
cost_list = []
incorrect_samples = []
predictions = []
true_labels = []

for i, (test_X, test_Y, sample_ids) in enumerate(data_test_loader):
    face_Y, dist_Y, mask_Y = [], [], []
    for Y_i in test_Y:
        underline_idx = Y_i.find("_")
        face_Y.append(int(Y_i[underline_idx-1]))
        dist_Y.append(int(Y_i[underline_idx+1]))
        mask_Y.append(int(Y_i[underline_idx+3]))

    X = test_X.to(device)
    face_Y = torch.LongTensor(face_Y).to(device)
    dist_Y = torch.LongTensor(dist_Y).to(device)
    mask_Y = torch.LongTensor(mask_Y).to(device)

    with torch.no_grad():
        output = model(X)

        cost_face = criterion(output[0], face_Y)
        cost_dist = criterion(output[1], dist_Y)
        cost_mask = criterion(output[2], mask_Y)
        cost = cost_face - 0.015 * cost_dist - 0.01 * cost_mask

        accuracy = (torch.max(output[0], 1)[1] == face_Y).float().mean().item()

        acc_list.append(accuracy)
        cost_list.append(cost.item())

        predictions.extend(torch.max(output[0], 1)[1].cpu().numpy())
        true_labels.extend(face_Y.cpu().numpy())

        print(f'Batch {i} averaged accuracy: {accuracy*100:.2f} %')

        incorrect_predictions = (torch.max(output[0], 1)[1] != face_Y)
        incorrect_indices = [i for i, x in enumerate(incorrect_predictions) if x]
        incorrect_samples.extend([sample_ids[idx] for idx in incorrect_indices])

if acc_list:  # Check if acc_list is not empty
    print('\nAveraged Accuracy: {:2.2f} %'.format(np.mean(acc_list) * 100))
else:
    raise Exception("\nNo valid accuracy computations were performed.")

Batch 0 averaged accuracy: 96.09 %
Batch 1 averaged accuracy: 96.09 %
Batch 2 averaged accuracy: 95.31 %
Batch 3 averaged accuracy: 96.88 %
Batch 4 averaged accuracy: 96.09 %
Batch 5 averaged accuracy: 100.00 %

Averaged Accuracy: 96.74 %


## Load data and test - 15 users

In [17]:
test_dir = './drive/MyDrive/AcFace_AE/RD-Net/Dataset/Scalability/NU15'

batch_size = 128
data_test = AudioFaceDataset(test_dir, split='test')
data_test_loader = DataLoader(dataset=data_test,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8)

print("Data loader setup complete.")

Data loader setup complete.




In [18]:
import torch
import numpy as np
import time
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.

model.eval()

acc_list = []
cost_list = []
incorrect_samples = []
predictions = []
true_labels = []

for i, (test_X, test_Y, sample_ids) in enumerate(data_test_loader):
    face_Y, dist_Y, mask_Y = [], [], []
    for Y_i in test_Y:
        underline_idx = Y_i.find("_")
        face_Y.append(int(Y_i[underline_idx-1]))
        dist_Y.append(int(Y_i[underline_idx+1]))
        mask_Y.append(int(Y_i[underline_idx+3]))

    X = test_X.to(device)
    face_Y = torch.LongTensor(face_Y).to(device)
    dist_Y = torch.LongTensor(dist_Y).to(device)
    mask_Y = torch.LongTensor(mask_Y).to(device)

    with torch.no_grad():
        output = model(X)

        cost_face = criterion(output[0], face_Y)
        cost_dist = criterion(output[1], dist_Y)
        cost_mask = criterion(output[2], mask_Y)
        cost = cost_face - 0.015 * cost_dist - 0.01 * cost_mask

        accuracy = (torch.max(output[0], 1)[1] == face_Y).float().mean().item()

        acc_list.append(accuracy)
        cost_list.append(cost.item())

        predictions.extend(torch.max(output[0], 1)[1].cpu().numpy())
        true_labels.extend(face_Y.cpu().numpy())

        print(f'Batch {i} averaged accuracy: {accuracy*100:.2f} %')

        incorrect_predictions = (torch.max(output[0], 1)[1] != face_Y)
        incorrect_indices = [i for i, x in enumerate(incorrect_predictions) if x]
        incorrect_samples.extend([sample_ids[idx] for idx in incorrect_indices])

if acc_list:  # Check if acc_list is not empty
    print('\nAveraged Accuracy: {:2.2f} %'.format(np.mean(acc_list) * 100))
else:
    raise Exception("\nNo valid accuracy computations were performed.")

Batch 0 averaged accuracy: 96.88 %
Batch 1 averaged accuracy: 96.09 %
Batch 2 averaged accuracy: 97.66 %
Batch 3 averaged accuracy: 96.09 %
Batch 4 averaged accuracy: 94.53 %
Batch 5 averaged accuracy: 95.31 %

Averaged Accuracy: 96.09 %
