In [1]:
%load_ext autoreload
%autoreload 2
    
import os
from pathlib import Path
import pandas as pd

import torch
from models.gaze_model import FineTuneModel, FaceModel, EyesModel, FaceGridModel
from dataset.dataset import GazeDetectionDataset
from facemesh import FaceMeshBlock, FaceMesh
from pupil_detection import IrisLM, IrisBlock
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import mean_absolute_percentage_error as mape
import numpy as np
from tqdm import tqdm

In [2]:
def train(model: FineTuneModel, dataloader_train: DataLoader):
    total_loss = 0.0
    model.train()
    preds_list = []
    labels_list = []
    for i, data in enumerate(dataloader_train):
        inputs, labels, inputs_eye_l, inputs_eye_r, inputs_mask = data['image'], data['coordinates'], \
                                                     data['eye_l'], data['eye_r'], data['face_mask']

        optimizer.zero_grad()

        outputs = model(inputs, inputs_eye_l, inputs_eye_r, inputs_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        preds_list += outputs.cpu().detach().tolist()
        labels_list += labels.cpu().detach().tolist()
        total_loss += loss.cpu().item()

    loss = total_loss / (i + 1)
    mape_value = mape(labels_list, preds_list)
    return loss, mape_value

In [3]:
def eval(model: FineTuneModel, dataloader_val: DataLoader):
    total_loss = 0.0
    model.eval()
    preds = []
    preds_list = []
    labels_list = []
    for i, data in enumerate(dataloader_val):
        inputs, labels, inputs_eye_l, inputs_eye_r, inputs_mask = data['image'], data['coordinates'], \
                                                     data['eye_l'], data['eye_r'], data['face_mask']
        with torch.no_grad():
            outputs = model(inputs, inputs_eye_l, inputs_eye_r, inputs_mask)
        loss = criterion(outputs, labels)
        total_loss += loss.cpu().item()
        preds_list += outputs.cpu().detach().tolist()
        labels_list += labels.cpu().detach().tolist()
        
    loss = total_loss / (i + 1)
    mape_value = mape(labels_list, preds_list)
    return loss, mape_value

In [4]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss

In [5]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [6]:
base_folder = "./real_experiment/calibration_dataset/"
frames_folder = "frames"
annotations_folder = "annotations"
frames_folders_path = os.path.join(base_folder, frames_folder)
dfs = []
for frames_name in tqdm(os.listdir(frames_folders_path)):
    ann_path = os.path.join(base_folder, annotations_folder, f"{frames_name}.txt").replace("frames", "points")
    frames_path = os.path.join(frames_folders_path, frames_name)
    p = Path(frames_path).glob('*.png')
    paths = [str(path.absolute()) for path in p]
    df_files = pd.DataFrame({"paths": paths})
    df_files["ind"] = df_files.paths.apply(lambda x: Path(x).stem)
    df = pd.read_csv(
        ann_path,
        sep = " ",
        header=None
    )
    cols = [
        "timestamp", "x_gt", "y_gt", "x1", "y1",
        "x2", "y2", "screen_w", "screen_h"
    ]
    df.columns = cols
    
    df["x_normalized"] = df["x_gt"] / df["screen_w"]
    df["y_normalized"] = df["y_gt"] / df["screen_h"]
    df["timestamp"] = df["timestamp"].apply(str)
    full_df = df_files.merge(df, left_on="ind", right_on="timestamp").drop(columns = ["ind"])
    dfs.append(full_df)

100%|████████████████████████████████████████████| 8/8 [00:00<00:00, 108.59it/s]


In [7]:
# frames_folder = "./real_experiment/calibration_dataset/frames/"
# p = Path(frames_folder).glob('*.png')
# paths = [str(path.absolute()) for path in p]
# df_files = pd.DataFrame({"paths": paths})
# df_files["ind"] = df_files.paths.apply(lambda x: Path(x).stem)

In [8]:
# df = pd.read_csv(
#     "./real_experiment/points_train.txt",
#     sep = " ",
#     header=None
# )
# cols = [
#     "timestamp", "x_gt", "y_gt", "x1", "y1",
#     "x2", "y2", "screen_w", "screen_h"
# ]
# df.columns = cols

# df["x_normalized"] = df["x_gt"] / df["screen_w"]
# df["y_normalized"] = df["y_gt"] / df["screen_h"]
# df["timestamp"] = df["timestamp"].apply(str)
# df.head()

In [9]:
# full_df = df_files.merge(df, left_on="ind", right_on="timestamp").drop(columns = ["ind"])

In [10]:
full_df = pd.concat(dfs)

In [11]:
full_df.shape

(855, 12)

In [12]:
full_df["participant_name"] = full_df["paths"].apply(lambda x: x.split("/")[-2].split("_")[2])

In [13]:
full_df_train = full_df[full_df["participant_name"] != "marina"]

In [14]:
test_diff_person = full_df[full_df["participant_name"] == "marina"]

In [15]:
full_df_train = pd.read_csv("real_experiment/calibration_dataset/cleaned_train.csv")

In [16]:
full_df_train.head()

Unnamed: 0,paths,timestamp,screen_w,screen_h,participant_name,...,y1,x2,y2,x_normalized,y_normalized
0,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697561000.0,2474,1520,misha,...,1045.666667,1010.333333,1205.666667,0.376044,0.74057
1,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697561000.0,2474,1520,misha,...,1342.5,2451.5,1497.0,0.960287,0.935855
2,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697561000.0,2474,1520,misha,...,386.333333,978.666667,546.333333,0.363244,0.306798
3,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697561000.0,2474,1520,misha,...,1125.0,790.5,1285.0,0.287187,0.792763
4,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697561000.0,2474,1520,misha,...,909.0,2221.666667,1069.0,0.86567,0.650658


In [17]:
NUM_SAMPLES = None
BATCH_SIZE = 1024
LEARNING_RATE = 1e-3
REDUCE_FACTOR = 0.5
PATIENCE = 10
NUM_EPOCHS = 100
WEIGHT_DECAY = 1e-4
CHECKPOINTS_PATH = "./checkpoints/"
EXPERIMENT_NAME = "calibration_unfreezed_guys_cleaned_weighted_loss_more_compl_more_augm"
LOSS_WEIGHTS = [0.3, 0.7]

In [18]:
train_df, test_df = train_test_split(full_df_train.head(NUM_SAMPLES), test_size = 0.1, random_state=42, shuffle=True)
train_df, val_df = train_test_split(full_df_train, test_size = 0.1, random_state=42, shuffle=True)

In [19]:
augmentations = A.Compose(
    [
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
        A.Blur(p=0.3),
        A.CLAHE(p=0.1),
        A.RandomGamma(p=0.5),
        A.ImageCompression(quality_lower=75, p=0.5),
        A.MotionBlur(p=0.5)
    ]
)

In [20]:
trans_list = [A.Resize(192, 192)]

In [21]:
trans_list = [A.Resize(192, 192)]
dataset_train = GazeDetectionDataset(data = train_df, transform_list=trans_list,
                                     to_tensors=True, device=device, screen_features=False,
                                     transform=augmentations, augmentation_factor = 5)
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE,
                        shuffle=True, num_workers=0)
dataset_val = GazeDetectionDataset(data = val_df, transform_list=trans_list,
                                   to_tensors=True, device=device, screen_features=False)
dataloader_val = DataLoader(dataset_val, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=0)

Fusing layers... 
Fusing layers... 


In [45]:
class RMSELoss(nn.Module):
    def __init__(self, device = "cuda:0", eps=1e-16, weights = None):
        super().__init__()
        self.eps = eps
        self.weights = weights
        self.device = device

    def __mse_loss(self, input, target, weights):
        if weights is not None:
            weights_tensor = torch.from_numpy(np.array(weights)).to(device)
            return torch.sum(weights_tensor * (input - target) ** 2) / weights_tensor.sum()
        return torch.mean((input - target) ** 2)
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.__mse_loss(yhat, y, self.weights) + self.eps)
        return loss

In [46]:
class EyesModel(nn.Module):
    def __init__(self, pretrained_model_eyes: nn.Module):
        super(EyesModel, self).__init__()
        self.backbone = pretrained_model_eyes.backbone
        self.regression_head_eyes = nn.Sequential(
            IrisBlock(128, 128), IrisBlock(128, 128),
            IrisBlock(128, 128, stride=2),
            IrisBlock(128, 128), IrisBlock(128, 128),
            IrisBlock(128, 128, stride=2),
            IrisBlock(128, 128), IrisBlock(128, 128),
        )
        # connect eyes
        self.fc = nn.Sequential(
            nn.Linear(2 * 128 * 1 * 1, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 128),
            nn.ReLU(inplace=True),
        )

    def forward(self, x_eye_l, x_eye_r):
        x_eye_l = self.backbone(x_eye_l)
        x_eye_l = self.regression_head_eyes(x_eye_l)
        x_eye_l = x_eye_l.view(-1, 128 * 1 * 1)

        x_eye_r = self.backbone(x_eye_r)
        x_eye_r = self.regression_head_eyes(x_eye_r)
        x_eye_r = x_eye_r.view(-1, 128 * 1 * 1)
        x = torch.cat([x_eye_l, x_eye_r], 1)
        x = self.fc(x)
        return x

In [47]:
class FaceModel(nn.Module):
    def __init__(self, pretrained_model_face: nn.Module):
        super(FaceModel, self).__init__()
        self.backbone = pretrained_model_face.backbone
        self.regression_head_face = nn.Sequential(
            FaceMeshBlock(128, 128, stride=2),
            FaceMeshBlock(128, 128),
            FaceMeshBlock(128, 128),
            FaceMeshBlock(128, 128),
            FaceMeshBlock(128, 128),
            nn.Conv2d(128, 32, 1),
            nn.PReLU(32),
            FaceMeshBlock(32, 32),
        )
        self.fc = nn.Sequential(
            nn.Linear(32 * 3 * 3, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 64),
            nn.ReLU(inplace=True),
        )

    def _preprocess(self, x):
        return x.to(torch.float32) * 2.0 - 1.0

    def forward(self, x_face):
        x_face = self._preprocess(x_face)
        x_face = nn.ReflectionPad2d((1, 0, 1, 0))(x_face)
        x_face = self.backbone(x_face)
        x_face = self.regression_head_face(x_face)
        x_face = x_face.view(-1, 32 * 3 * 3)
        x = self.fc(x_face)
        return x

In [48]:
class FineTuneModel(nn.Module):
    def __init__(
        self,
        pretrained_model_face: nn.Module,
        pretrained_model_eyes: nn.Module,
        screen_features: bool = False,
    ):
        super(FineTuneModel, self).__init__()
        self.face_model = FaceModel(pretrained_model_face)
        self.eyes_model = EyesModel(pretrained_model_eyes)
        self.face_grid_model = FaceGridModel()
        self.screen_features = screen_features
        if not screen_features:
            self.fc = nn.Sequential(
                nn.Linear(128+64+128, 128),
                nn.ReLU(inplace=True),
                nn.Linear(128, 64),
                nn.ReLU(inplace=True),
                nn.Linear(64, 32),
                nn.ReLU(inplace=True),
                nn.Linear(32, 2),
            )
        else:
            self.fc1 = nn.Sequential(
                nn.Linear(128+64+128, 128),
                nn.ReLU(inplace=True),
                nn.Linear(128, 13),
                nn.ReLU(inplace=True),
            )
            self.layer_norm = nn.LayerNorm(16)
            self.fc2 = nn.Linear(16, 2)
            

    def _preprocess(self, x):
        return x.to(torch.float32) * 2.0 - 1.0
        
    def forward(self, x_face, x_eye_l, x_eye_r, x_grid, x_screen = None):
        if self.screen_features and x_screen is None:
            raise Exception("You should pass screen features")
        if not self.screen_features and x_screen is not None:
            warnings.warn("Screen fearures won't be used")
        x_eyes = self.eyes_model(x_eye_l, x_eye_r)
        x_face = self.face_model(x_face)
        x_grid = self.face_grid_model(x_grid)
        x = torch.cat([x_eyes, x_face, x_grid], axis = 1)
        if not self.screen_features:
            x = self.fc(x)
        else:
            x = self.fc1(x)
            x = torch.cat([x, x_screen], axis = 1)
            x = self.layer_norm(x)
            x = self.fc2(x)
        return x

In [26]:
from comet_ml import Experiment
from comet_ml.integration.pytorch import log_model

experiment = Experiment(
  api_key="4qtNKAjcucKnOrwC4pRvPaHRv",
  project_name="tweakle-gaze-calibration",
  workspace="kmisterios"
)

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/kmisterios/tweakle-gaze-calibration/6b3f6544f913427595ca024dc7b85b6b



In [27]:
experiment.set_name(f"{EXPERIMENT_NAME}")

In [28]:
pretrained_model_face = FaceMesh()
pretrained_model_face.load_weights("./weights/facemesh.pth")

model_path = "./weights/irislandmarks.pth"
pretrained_model_eyes = IrisLM()
weights = torch.load(model_path)
pretrained_model_eyes.load_state_dict(weights)

<All keys matched successfully>

In [29]:
CHECKPOINTS_PATH = "./checkpoints"
EXPERIMENT_NAME_ORIG = "face_eyes_mask_more_layers_more_patience_weighted_loss_more_complexity_tune"

In [30]:
model = FineTuneModel(pretrained_model_face, pretrained_model_eyes, screen_features=False).to(device)
model.load_state_dict(torch.load(os.path.join(CHECKPOINTS_PATH, f"best_{EXPERIMENT_NAME_ORIG}.pt")))
model.train()

FineTuneModel(
  (face_model): FaceModel(
    (backbone): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2))
      (1): PReLU(num_parameters=16)
      (2): FaceMeshBlock(
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16)
          (1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
        )
        (act): PReLU(num_parameters=16)
      )
      (3): FaceMeshBlock(
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16)
          (1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
        )
        (act): PReLU(num_parameters=16)
      )
      (4): FaceMeshBlock(
        (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), groups=16)
          (1): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1))
   

In [31]:
# for param in model.parameters():
#     param.requires_grad = False

# for param in model.fc.parameters():
#     param.requires_grad = True

In [32]:
criterion = RMSELoss(weights = LOSS_WEIGHTS)
optimizer = optim.Adam(model.parameters(), lr = LEARNING_RATE, weight_decay = WEIGHT_DECAY)
scheduler = ReduceLROnPlateau(optimizer, factor= REDUCE_FACTOR, patience=PATIENCE)

In [33]:
warmup = torch.optim.lr_scheduler.LinearLR(
    optimizer,
    start_factor=0.001,
    end_factor=1.0,
    total_iters=2,
)

In [None]:
val_loss_min = np.inf
epoch_save = None

for epoch in range(NUM_EPOCHS):
    train_loss, train_mape = train(model, dataloader_train)
    val_loss, val_mape = eval(model, dataloader_val)
    if epoch <= 2:
        warmup.step()
        current_lr = warmup.optimizer.param_groups[0]['lr']
    else:
        scheduler.step(val_loss)
        current_lr = scheduler.optimizer.param_groups[0]['lr']
    if val_loss_min > val_loss:
        val_loss_min = val_loss
        torch.save(model.state_dict(), os.path.join(CHECKPOINTS_PATH, f"best_{EXPERIMENT_NAME}.pt"))
        epoch_save = epoch

    print()
    print(f'Epoch: {epoch}: Train loss: {round(train_loss, 3)}; Val loss: {round(val_loss, 3)};')
    if epoch_save is not None:
        print(f'Best model saved on epoch {epoch_save}')
    experiment.log_metrics({
        "rmse_val": val_loss,
        "rmse_train": train_loss,
        "best_model_epoch": epoch_save,
        "train_mape": train_mape,
        "val_mape": val_mape,
        "epoch": epoch,
        "lr": current_lr
    })
experiment.end()


Epoch: 0: Train loss: 8.702; Val loss: 1.886;
Best model saved on epoch 0

Epoch: 1: Train loss: 8.024; Val loss: 1.73;
Best model saved on epoch 1

Epoch: 2: Train loss: 6.703; Val loss: 1.548;
Best model saved on epoch 2

Epoch: 3: Train loss: 5.861; Val loss: 1.425;
Best model saved on epoch 3

Epoch: 4: Train loss: 5.284; Val loss: 1.35;
Best model saved on epoch 4

Epoch: 5: Train loss: 4.889; Val loss: 1.261;
Best model saved on epoch 5

Epoch: 6: Train loss: 4.49; Val loss: 1.188;
Best model saved on epoch 6

Epoch: 7: Train loss: 4.085; Val loss: 1.111;
Best model saved on epoch 7

Epoch: 8: Train loss: 3.723; Val loss: 1.063;
Best model saved on epoch 8

Epoch: 9: Train loss: 3.547; Val loss: 1.051;
Best model saved on epoch 9

Epoch: 10: Train loss: 3.394; Val loss: 1.125;
Best model saved on epoch 9

Epoch: 11: Train loss: 3.269; Val loss: 1.028;
Best model saved on epoch 11

Epoch: 12: Train loss: 3.106; Val loss: 0.996;
Best model saved on epoch 12

Epoch: 13: Train loss:

In [98]:
# experiment.end()

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/kmisterios/tweakle-gaze-calibration/ae312d8459694c74a06aa0396db10532
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     best_model_epoch [76] : (0, 37)
[1;38;5;39mCOMET INFO:[0m     epoch [76]            : (0, 75)
[1;38;5;39mCOMET INFO:[0m     lr [76]               : (6.25e-05, 0.001)
[1;38;5;39mCOMET INFO:[0m     rmse_train [76]       : (0.39864196162766197, 5.732271712219478)
[1;38;5;39mCOMET INFO:[0m     rmse_val [76]         : (0.9365042474996205, 2.0441348549319875)
[1;38;5;39mCOMET

In [49]:
model = FineTuneModel(pretrained_model_face, pretrained_model_eyes, screen_features=False).to(device)
model.load_state_dict(torch.load(os.path.join(CHECKPOINTS_PATH, f"best_{EXPERIMENT_NAME}.pt")))
# model.load_state_dict(torch.load(os.path.join(CHECKPOINTS_PATH, f"best_raw_images.pt")))
criterion = RMSELoss(weights = LOSS_WEIGHTS)
model.eval()

FineTuneModel(
  (face_model): FaceModel(
    (backbone): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2))
      (1): PReLU(num_parameters=16)
      (2): FaceMeshBlock(
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16)
          (1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
        )
        (act): PReLU(num_parameters=16)
      )
      (3): FaceMeshBlock(
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16)
          (1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
        )
        (act): PReLU(num_parameters=16)
      )
      (4): FaceMeshBlock(
        (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), groups=16)
          (1): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1))
   

In [50]:
dataset_test = GazeDetectionDataset(data = test_df, transform_list=trans_list, to_tensors=True, device=device, screen_features=False)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=0)

Fusing layers... 


In [51]:
from tqdm import tqdm

preds = []
labels_list = []
losses = 0
for i, data in tqdm(enumerate(dataloader_test), total = len(dataloader_test)):
    inputs, labels, inputs_eye_l, inputs_eye_r, inputs_mask = data['image'], data['coordinates'], \
                                                     data['eye_l'], data['eye_r'], data['face_mask']
    with torch.no_grad():
        outputs = model(inputs, inputs_eye_l, inputs_eye_r, inputs_mask)
    loss = criterion(outputs, labels)
    losses += loss.detach().cpu().item()
    pred = outputs.cpu().numpy()
    preds.append(pred)
    labels_list.append(labels.cpu().numpy())

print(f"Test loss: {round(losses / (i + 1), 3)}")

100%|█████████████████████████████████████████████| 1/1 [00:02<00:00,  2.88s/it]

Test loss: 0.759





In [52]:
print(f"Test loss: {round(losses / (i + 1), 3)}")

Test loss: 0.759


In [53]:
preds = np.vstack(preds)
labels = np.vstack(labels_list)

mape_value = mape(labels, preds)
print(f"Test MAPE: {mape_value}")

test_df_copy = test_df.copy()

test_df_copy["pred_x"] = preds.T[0]
test_df_copy["pred_y"] = preds.T[1]

test_df_copy[['x_normalized', 'y_normalized', 'pred_x', 'pred_y']].tail(40)

Test MAPE: 0.44033342599868774


Unnamed: 0,x_normalized,y_normalized,pred_x,pred_y
411,0.518998,0.077632,0.552534,0.287454
94,0.369038,0.068421,0.340619,0.136466
70,0.228981,0.452632,0.351523,0.533227
415,0.925222,0.744737,0.739823,0.697012
388,0.764349,0.615132,0.682541,0.522663
220,0.183508,0.762061,0.744516,0.613828
39,0.483023,0.279605,0.430724,0.390882
55,0.689706,0.991009,0.719439,0.833414
349,0.538399,0.55625,0.404731,0.48175
231,0.856912,0.565132,0.85009,0.496904


In [54]:
dataset_test = GazeDetectionDataset(data = test_diff_person, transform_list=trans_list, to_tensors=True, device=device, screen_features=False)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=0)

Fusing layers... 


In [55]:
from tqdm import tqdm

preds = []
labels_list = []
losses = 0
for i, data in tqdm(enumerate(dataloader_test), total = len(dataloader_test)):
    inputs, labels, inputs_eye_l, inputs_eye_r, inputs_mask = data['image'], data['coordinates'], \
                                                     data['eye_l'], data['eye_r'], data['face_mask']
    with torch.no_grad():
        outputs = model(inputs, inputs_eye_l, inputs_eye_r, inputs_mask)
    loss = criterion(outputs, labels)
    losses += loss.detach().cpu().item()
    pred = outputs.cpu().numpy()
    preds.append(pred)
    labels_list.append(labels.cpu().numpy())

print(f"Test loss: {round(losses / (i + 1), 3)}")

100%|█████████████████████████████████████████████| 1/1 [00:09<00:00,  9.09s/it]

Test loss: 2.282





In [56]:
preds = np.vstack(preds)
labels = np.vstack(labels_list)

mape_value = mape(labels, preds)
print(f"Test MAPE: {mape_value}")

test_df_copy = test_diff_person.copy()

test_df_copy["pred_x"] = preds.T[0]
test_df_copy["pred_y"] = preds.T[1]

test_df_copy[['x_normalized', 'y_normalized', 'pred_x', 'pred_y']].tail(40)

Test MAPE: 1.313175082206726


Unnamed: 0,x_normalized,y_normalized,pred_x,pred_y
93,0.436944,0.726974,0.363734,0.729092
94,0.789814,0.639474,0.609427,0.580578
95,0.402587,0.583553,0.422278,0.841643
96,0.717057,0.491447,0.552626,0.762084
97,0.611156,0.456579,0.500313,0.77837
98,0.886419,0.826974,0.614491,0.768081
99,0.864996,0.667105,0.816038,0.786971
100,0.228375,0.943421,0.377791,0.8716
101,0.446241,0.903289,0.481705,0.827559
102,0.260307,0.111184,0.269105,0.256264


In [57]:
test_df_copy

Unnamed: 0,paths,timestamp,x_gt,y_gt,x1,...,x_normalized,y_normalized,participant_name,pred_x,pred_y
0,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697650382.32567,1845,367,1765,...,0.745756,0.241447,marina,0.707897,0.243687
1,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697650347.0760791,2463,1358,2383,...,0.995554,0.893421,marina,0.745164,0.828400
2,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697650325.385819,854,1341,774,...,0.345190,0.882237,marina,0.416604,0.795334
3,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697650354.374967,2209,1125,2129,...,0.892886,0.740132,marina,0.716653,0.813668
4,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697650374.0317252,2221,664,2141,...,0.897736,0.436842,marina,0.686274,0.673402
...,...,...,...,...,...,...,...,...,...,...,...
128,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697650374.6311529,2251,628,2171,...,0.909863,0.413158,marina,0.691270,0.630502
129,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697650305.658786,1035,690,955,...,0.418351,0.453947,marina,0.512416,0.754501
130,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697650336.078906,1807,1282,1727,...,0.730396,0.843421,marina,0.718043,0.853024
131,/home/ubuntu/projects/tweakle/gaze_detection/r...,1697650392.957037,2086,105,2006,...,0.843169,0.069079,marina,0.844821,0.287019
