In [1]:
import os
from pathlib import Path
import pandas as pd

import torch
from models.gaze_model import FineTuneModel
from dataset.dataset import GazeDetectionDataset
from facemesh import FaceMesh
from pupil_detection import IrisLM
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import mean_absolute_percentage_error as mape
import numpy as np

In [2]:
def train(model: FineTuneModel, dataloader_train: DataLoader):
    total_loss = 0.0
    model.train()
    preds_list = []
    labels_list = []
    for i, data in enumerate(dataloader_train):
        inputs, labels, inputs_eye_l, inputs_eye_r, inputs_mask = data['image'], data['coordinates'], \
                                                     data['eye_l'], data['eye_r'], data['face_mask']

        optimizer.zero_grad()

        outputs = model(inputs, inputs_eye_l, inputs_eye_r, inputs_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
        preds_list += outputs.cpu().detach().tolist()
        labels_list += labels.cpu().detach().tolist()
        total_loss += loss.cpu().item()

    loss = total_loss / (i + 1)
    mape_value = mape(labels_list, preds_list)
    return loss, mape_value

In [3]:
def eval(model: FineTuneModel, dataloader_val: DataLoader):
    total_loss = 0.0
    model.eval()
    preds = []
    preds_list = []
    labels_list = []
    for i, data in enumerate(dataloader_val):
        inputs, labels, inputs_eye_l, inputs_eye_r, inputs_mask = data['image'], data['coordinates'], \
                                                     data['eye_l'], data['eye_r'], data['face_mask']
        with torch.no_grad():
            outputs = model(inputs, inputs_eye_l, inputs_eye_r, inputs_mask)
        loss = criterion(outputs, labels)
        total_loss += loss.cpu().item()
        preds_list += outputs.cpu().detach().tolist()
        labels_list += labels.cpu().detach().tolist()
        
    loss = total_loss / (i + 1)
    mape_value = mape(labels_list, preds_list)
    return loss, mape_value

In [4]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss

In [5]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [6]:
frames_folder = "./real_experiment/frames_train/"
p = Path(frames_folder).glob('*.png')
paths = [str(path.absolute()) for path in p]
df_files = pd.DataFrame({"paths": paths})
df_files["ind"] = df_files.paths.apply(lambda x: Path(x).stem)

In [7]:
df = pd.read_csv(
    "./real_experiment/points_train.txt",
    sep = " ",
    header=None
)
cols = [
    "timestamp", "x_gt", "y_gt", "x1", "y1",
    "x2", "y2", "screen_w", "screen_h"
]
df.columns = cols

df["x_normalized"] = df["x_gt"] / df["screen_w"]
df["y_normalized"] = df["y_gt"] / df["screen_h"]
df["timestamp"] = df["timestamp"].apply(str)
df.head()

Unnamed: 0,timestamp,x_gt,y_gt,x1,y1,...,y2,screen_w,screen_h,x_normalized,y_normalized
0,1697215062.4614332,2437,34,2357,0,...,114,2474,1520,0.985044,0.022368
1,1697215063.1626291,2437,34,2357,0,...,114,2474,1520,0.985044,0.022368
2,1697215063.8596458,2437,34,2357,0,...,114,2474,1520,0.985044,0.022368
3,1697215064.5608058,2437,34,2357,0,...,114,2474,1520,0.985044,0.022368
4,1697215065.157571,2437,34,2357,0,...,114,2474,1520,0.985044,0.022368


In [8]:
full_df = df_files.merge(df, left_on="ind", right_on="timestamp").drop(columns = ["ind"])

In [9]:
full_df.shape

(112, 12)

In [71]:
NUM_SAMPLES = None
BATCH_SIZE = 128
LEARNING_RATE = 5e-3
REDUCE_FACTOR = 0.5
PATIENCE = 5
NUM_EPOCHS = 40
WEIGHT_DECAY = 1e-4
CHECKPOINTS_PATH = "./checkpoints/"
EXPERIMENT_NAME = "calibration_last_last"

In [72]:
train_df, test_df = train_test_split(full_df.head(NUM_SAMPLES), test_size = 0.05, random_state=42, shuffle=True)
train_df, val_df = train_test_split(train_df, test_size = 0.1, random_state=42, shuffle=True)

In [73]:
trans_list = [A.Resize(192, 192)]
dataset_train = GazeDetectionDataset(data = train_df, transform_list=trans_list,
                                     to_tensors=True, device=device, screen_features=False)
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE,
                        shuffle=True, num_workers=0)
dataset_val = GazeDetectionDataset(data = val_df, transform_list=trans_list,
                                   to_tensors=True, device=device, screen_features=False)
dataloader_val = DataLoader(dataset_val, batch_size=BATCH_SIZE,
                        shuffle=True, num_workers=0)

Fusing layers... 
Fusing layers... 


In [74]:
from comet_ml import Experiment
from comet_ml.integration.pytorch import log_model

experiment = Experiment(
  api_key="4qtNKAjcucKnOrwC4pRvPaHRv",
  project_name="tweakle-gaze-calibration",
  workspace="kmisterios"
)

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/kmisterios/tweakle-gaze-calibration/feabd5cb7d2f4013948936827835b1d3



In [75]:
experiment.set_name(f"{EXPERIMENT_NAME}")

In [76]:
pretrained_model_face = FaceMesh()
pretrained_model_face.load_weights("./weights/facemesh.pth")

model_path = "./weights/irislandmarks.pth"
pretrained_model_eyes = IrisLM()
weights = torch.load(model_path)
pretrained_model_eyes.load_state_dict(weights)

<All keys matched successfully>

In [77]:
CHECKPOINTS_PATH = "./checkpoints"
EXPERIMENT_NAME_ORIG = "face_eyes_mask_correctedv2_balanced_model_lr_fixed_eval_patience"

In [78]:
model = FineTuneModel(pretrained_model_face, pretrained_model_eyes, screen_features=False).to(device)
model.load_state_dict(torch.load(os.path.join(CHECKPOINTS_PATH, f"best_{EXPERIMENT_NAME_ORIG}.pt")))
model.train()

FineTuneModel(
  (face_model): FaceModel(
    (backbone): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2))
      (1): PReLU(num_parameters=16)
      (2): FaceMeshBlock(
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16)
          (1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
        )
        (act): PReLU(num_parameters=16)
      )
      (3): FaceMeshBlock(
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16)
          (1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
        )
        (act): PReLU(num_parameters=16)
      )
      (4): FaceMeshBlock(
        (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), groups=16)
          (1): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1))
   

In [79]:
for param in model.parameters():
    param.requires_grad = False

for param in model.fc.parameters():
    param.requires_grad = True

In [80]:
criterion = RMSELoss()
optimizer = optim.Adam(model.fc.parameters(), lr = LEARNING_RATE, weight_decay = WEIGHT_DECAY)
scheduler = ReduceLROnPlateau(optimizer, factor= REDUCE_FACTOR, patience=PATIENCE)

In [81]:
val_loss_min = np.inf
epoch_save = None

for epoch in range(NUM_EPOCHS):
    train_loss, train_mape = train(model, dataloader_train)
    val_loss, val_mape = eval(model, dataloader_val)
    scheduler.step(val_loss)
    current_lr = scheduler.optimizer.param_groups[0]['lr']
    if val_loss_min > val_loss:
        val_loss_min = val_loss
        torch.save(model.state_dict(), os.path.join(CHECKPOINTS_PATH, f"best_{EXPERIMENT_NAME}.pt"))
        epoch_save = epoch

    print()
    print(f'Epoch: {epoch}: Train loss: {round(train_loss, 3)}; Val loss: {round(val_loss, 3)};')
    if epoch_save is not None:
        print(f'Best model saved on epoch {epoch_save}')
    experiment.log_metrics({
        "rmse_val": val_loss,
        "rmse_train": train_loss,
        "best_model_epoch": epoch_save,
        "train_mape": train_mape,
        "val_mape": val_mape,
        "epoch": epoch,
        "lr": current_lr
    })
experiment.end()


Epoch: 0: Train loss: 0.259; Val loss: 0.381;
Best model saved on epoch 0

Epoch: 1: Train loss: 0.328; Val loss: 0.299;
Best model saved on epoch 1

Epoch: 2: Train loss: 0.256; Val loss: 0.188;
Best model saved on epoch 2

Epoch: 3: Train loss: 0.201; Val loss: 0.185;
Best model saved on epoch 3

Epoch: 4: Train loss: 0.222; Val loss: 0.174;
Best model saved on epoch 4

Epoch: 5: Train loss: 0.196; Val loss: 0.177;
Best model saved on epoch 4

Epoch: 6: Train loss: 0.165; Val loss: 0.208;
Best model saved on epoch 4

Epoch: 7: Train loss: 0.176; Val loss: 0.192;
Best model saved on epoch 4

Epoch: 8: Train loss: 0.161; Val loss: 0.157;
Best model saved on epoch 8

Epoch: 9: Train loss: 0.145; Val loss: 0.155;
Best model saved on epoch 9

Epoch: 10: Train loss: 0.152; Val loss: 0.151;
Best model saved on epoch 10

Epoch: 11: Train loss: 0.138; Val loss: 0.172;
Best model saved on epoch 10

Epoch: 12: Train loss: 0.137; Val loss: 0.179;
Best model saved on epoch 10

Epoch: 13: Train l

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/kmisterios/tweakle-gaze-calibration/feabd5cb7d2f4013948936827835b1d3
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     best_model_epoch [40] : (0, 35)
[1;38;5;39mCOMET INFO:[0m     epoch [40]            : (0, 39)
[1;38;5;39mCOMET INFO:[0m     lr                    : 0.005
[1;38;5;39mCOMET INFO:[0m     rmse_train [40]       : (0.08254006505012512, 0.3284291625022888)
[1;38;5;39mCOMET INFO:[0m     rmse_val [40]         : (0.11099404096603394, 0.3809197247028351)
[1;38;5;39mCOMET INFO:[0m


Epoch: 39: Train loss: 0.083; Val loss: 0.116;
Best model saved on epoch 35


[1;38;5;39mCOMET INFO:[0m Uploading 1 metrics, params and output messages


In [82]:
experiment.end()

In [83]:
model = FineTuneModel(pretrained_model_face, pretrained_model_eyes, screen_features=False).to(device)
model.load_state_dict(torch.load(os.path.join(CHECKPOINTS_PATH, f"best_{EXPERIMENT_NAME}.pt")))
# model.load_state_dict(torch.load(os.path.join(CHECKPOINTS_PATH, f"best_raw_images.pt")))
criterion = RMSELoss()
model.eval()

FineTuneModel(
  (face_model): FaceModel(
    (backbone): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2))
      (1): PReLU(num_parameters=16)
      (2): FaceMeshBlock(
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16)
          (1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
        )
        (act): PReLU(num_parameters=16)
      )
      (3): FaceMeshBlock(
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16)
          (1): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
        )
        (act): PReLU(num_parameters=16)
      )
      (4): FaceMeshBlock(
        (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (convs): Sequential(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), groups=16)
          (1): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1))
   

In [84]:
dataset_test = GazeDetectionDataset(data = test_df, transform_list=trans_list, to_tensors=True, device=device, screen_features=False)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=0)

Fusing layers... 


In [85]:
from tqdm import tqdm

preds = []
labels_list = []
losses = 0
for i, data in tqdm(enumerate(dataloader_test), total = len(dataloader_test)):
    inputs, labels, inputs_eye_l, inputs_eye_r, inputs_mask = data['image'], data['coordinates'], \
                                                     data['eye_l'], data['eye_r'], data['face_mask']
    with torch.no_grad():
        outputs = model(inputs, inputs_eye_l, inputs_eye_r, inputs_mask)
    loss = criterion(outputs, labels)
    losses += loss.detach().cpu().item()
    pred = outputs.cpu().numpy()
    preds.append(pred)
    labels_list.append(labels.cpu().numpy())

print(f"Test loss: {round(losses / (i + 1), 3)}")

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  2.03it/s]

Test loss: 0.136





In [86]:
preds = np.vstack(preds)
labels = np.vstack(labels_list)

mape_value = mape(labels, preds)
print(f"Test MAPE: {mape_value}")

test_df_copy = test_df.copy()

test_df_copy["pred_x"] = preds.T[0]
test_df_copy["pred_y"] = preds.T[1]

test_df_copy[['x_normalized', 'y_normalized', 'pred_x', 'pred_y']].tail(40)

Test MAPE: 0.4376121759414673


Unnamed: 0,x_normalized,y_normalized,pred_x,pred_y
40,0.618027,0.957237,0.598048,0.711305
65,0.648343,0.482237,0.632132,0.506011
4,0.792643,0.958553,0.878069,0.660725
47,0.0481,0.049342,0.119459,0.169695
42,0.702506,0.484868,0.757945,0.53987
69,0.563864,0.641447,0.370511,0.648744
