# **FATIGUE NET**

- Imports
- Definition of custom dataset
- Definition of our FatigueNet model class
- Training and testing part

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import itertools
import os

from torch.utils.data import DataLoader, random_split, Dataset
import pandas as pd

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Custom Datasets

In [2]:
class CatapultDataset(Dataset):
    def __init__(self, dataset_list):
        self.catapult = [data[0] for data in dataset_list]
        self.targets = [data[1][-1] for data in dataset_list]

    def __len__(self):
        return len(self.catapult)

    def __getitem__(self, idx):
        input_sample = torch.Tensor(self.catapult[idx].values.astype(np.float64))
        target_sample = torch.Tensor([self.targets[idx]])
        
        return input_sample, target_sample
    
class JMDataset(Dataset):
    def __init__(self, dataset_list):
        self.targets = [data[1].pop() for data in dataset_list]
        self.jm = [data[1] for data in dataset_list]
        self.jm = np.array(self.jm)

    def __len__(self):
        return len(self.jm)

    def __getitem__(self, idx):
        input_sample = torch.Tensor(self.jm[idx].astype(np.float64))
        target_sample = torch.Tensor([self.targets[idx]])
        
        return input_sample, target_sample

# FatigueNet model

In [5]:
class FatigueNet(nn.Module):
    def __init__(self, nb_features_catapult, nb_features_jm):
        super(FatigueNet, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv1d(in_channels=nb_features_catapult, out_channels=16, kernel_size=20, stride=1, padding=9),
            nn.ReLU(),
            nn.Conv1d(in_channels=16, out_channels=16, kernel_size=20, stride=1, padding=9),
            nn.ReLU()
        )

        self.block2 = nn.Sequential(
            nn.AvgPool1d(kernel_size=4, stride=4, padding=2),
            nn.Conv1d(16, 32, kernel_size=15, stride=1, padding=7),
            nn.ReLU(),
            nn.Conv1d(32, 32, kernel_size=15, stride=1, padding=7),
            nn.ReLU()
        )

        self.block3 = nn.Sequential(
            nn.AvgPool1d(kernel_size=4, stride=4, padding=2),
            nn.Conv1d(32, 64, kernel_size=10, stride=1, padding=5),
            nn.ReLU(),
            nn.Conv1d(64, 64, kernel_size=10, stride=1, padding=4),
            nn.ReLU()
        )

        self.block4 = nn.Sequential(
            nn.AvgPool1d(kernel_size=4, stride=4, padding=2),
            nn.Conv1d(64, 128, kernel_size=10, stride=1, padding=5),
            nn.ReLU(),
            nn.Conv1d(128, 128, kernel_size=10, stride=1, padding=4),
            nn.ReLU()
        )

        self.block5 = nn.Sequential(
            nn.AvgPool1d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(p=0.2),
            nn.Conv1d(128, 256, kernel_size=10, stride=1, padding=5),
            nn.ReLU(),
            nn.Conv1d(256, 256, kernel_size=10, stride=1, padding=4),
            nn.ReLU()
        )
        
        self.block6 = nn.Sequential(
            nn.AvgPool1d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(p=0.2),
            nn.Conv1d(256, 512, kernel_size=10, stride=1, padding=4),
            nn.ReLU(),
            nn.Conv1d(512, 512, kernel_size=10, stride=1, padding=4),
            nn.ReLU()
        )
        
        self.block7 = nn.Sequential(
            nn.AvgPool1d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(p=0.2)
        )
        
        self.gru = nn.GRU(512, 256, num_layers=3, batch_first=True)
        
        # Fully connected layer
        self.fc1 = nn.Linear(256, 1)
        self.block8 = nn.Sequential(
            nn.Linear(256 + nb_features_jm, 256 + nb_features_jm),
            nn.ReLU(),
            nn.Linear(256 + nb_features_jm, 256 + nb_features_jm),
            nn.ReLU(),
            nn.Linear(256 + nb_features_jm, 256 + nb_features_jm),
            nn.ReLU(),
            nn.Linear(256 + nb_features_jm, 1)
        )
        
    def forward(self, x1, x2):
        x1 = x1.permute(0, 2, 1)
        pool = nn.AvgPool1d(kernel_size=50, stride=int(x1.shape[2]/4000), padding=25)

        x1 = pool(x1)
        x1 = self.block1(x1)
        x1 = self.block2(x1)
        x1 = self.block3(x1)
        x1 = self.block4(x1)
        x1 = self.block5(x1)
        x1 = self.block6(x1)
        x1 = self.block7(x1)
        
        # Apply GRU layers
        x1 = x1.permute(0, 2, 1)
        x1, _ = self.gru(x1)
        
        # Select the last sequence of each batch (assuming batch_first=True)
        x1 = x1[:, -1, :]
        
        # Apply the fully connected layer
        x = torch.cat((x1, x2), dim=1)
        x = self.block8(x)
        
        return x

    def train_with_gradient_accumulation(self, train_loader, train_jm_loader, num_epochs, accumulation_steps, device, show_log=True):
        optimizer = optim.Adam(self.parameters(), lr=0.001)
        criterion = nn.L1Loss()

        for epoch in range(num_epochs):
            running_loss = 0.0
            self.to(device)
            self.train()  # Met le modèle en mode entraînement

            assert len(train_jm_loader) == len(train_loader)

            combined_loader = zip(train_jm_loader, train_loader)
            i = 0
            for (inputs_jm, targets_jm), (inputs, targets) in combined_loader:

                inputs_jm = inputs_jm.to(device)
                inputs = inputs.to(device)
                targets = targets.to(device)

                # Forward pass
                outputs = self(inputs, inputs_jm)
                loss = criterion(outputs, targets)

                loss = loss / accumulation_steps  # Normalisation de la perte
                loss.backward()

                # Gradient accumulation
                if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
                    optimizer.step()
                    optimizer.zero_grad()

                running_loss += loss.item()
                i += 1

            if show_log : print(f'Epoch {epoch + 1}, MAE: {running_loss / len(train_loader)}')

        if show_log : print('Training Done')

    def predict(self, test_loader, test_jm_loader, device):
        self.eval()
        mae = 0.0
        all_predictions = []
        all_targets = []

        with torch.no_grad():
            
            assert len(test_jm_loader) == len(test_loader)

            combined_loader = zip(test_jm_loader, test_loader)
            
            for (inputs_jm, targets_jm), (inputs, targets) in combined_loader:
                
                inputs = inputs.to(device)
                inputs_jm = inputs_jm.to(device)
                targets = targets.to(device)

                # Forward pass
                outputs = self(inputs, inputs_jm)

                # Calcul de la MAE pour ce lot
                batch_mae = mae = torch.abs(outputs - targets).mean()
                mae += batch_mae.item()

                predictions = outputs.cpu().numpy()
                targets = targets.cpu().numpy()

                all_predictions.extend(predictions)
                all_targets.extend(targets)

        return all_predictions, all_targets

    def evaluate_model(self, test_loader, test_jm_loader, device, show_cm=False):
        all_predictions, all_targets = self.predict_model(self, test_loader, test_jm_loader, device)

        # Average MAE
        mae /= len(test_loader)
        num_classes = 10
        
        predictions_discretized = [int(round(value[0])) for value in all_predictions]
        targets_discretized = [int(y) for y in all_targets]
        
        if show_cm:
            cm = confusion_matrix(targets_discretized, predictions_discretized, labels=range(1, num_classes + 1))

            sns.set(font_scale=1.2)
            plt.figure(figsize=(6, 6))
            sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False, square=True)
            plt.xlabel('Predicted Class')
            plt.ylabel('Actual Class')
            plt.title('Confusion Matrix')
            plt.show()

        return mae

# Training and Testing

In [4]:
def create_catapult_jm_loader(train_catapult_path, train_jm_path, player_id, batch_size, gnr, is_train=True):
    # Get all catapult data per session for the player_id
    catapult_data = pd.read_csv(f"{train_catapult_path}{player_id}.csv")
    catapult_by_sessions = catapult_data.groupby(['n_date', 'session_id'])
    catapult_by_sessions_dict = {}
    for session, df in catapult_by_sessions:
        catapult_by_sessions_dict[session] = df
        if is_train : catapult_by_sessions_dict[session].drop("Unnamed: 0", axis=1, inplace = True)
    
    # Get all JM data per session for the player_id
    jm = pd.read_csv(f"{train_jm_path}{player_id}.csv").drop("Unnamed: 0", axis=1)
    print(len(jm.columns) - 3)

    dataset = []
    # Create a tuple list containing catapult and JM data per session
    for session_jm in jm.values.tolist():
        try:
            dataset.append( [catapult_by_sessions_dict[(session_jm[-2], session_jm[-1])], session_jm[:-2]] )
        except:
            print("Session not found:", (session_jm[-2], session_jm[-1]))

    # Dropt n_date and session_id now that we have used them
    for data in dataset:
        data[0].drop(['n_date', 'session_id'], axis=1, inplace=True)

    # Create custom datasets
    catapult_dataset = CatapultDataset(dataset)
    jm_dataset = JMDataset(dataset)

    # Create dataset loaders
    train_catapult_loader = DataLoader(catapult_dataset, batch_size=batch_size, shuffle=False, generator=gnr)
    train_jm_loader = DataLoader(jm_dataset, batch_size=batch_size, shuffle=False, generator=gnr)
    return train_catapult_loader, train_jm_loader

In [None]:
# Defining hyper-parameters
device = "cuda"
num_epochs = 10
batch_size = 4
accumulation_steps = 4
nb_features_catapult = 6
nb_features_jm = 20

# Seed
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
gnr = torch.Generator().manual_seed(seed)

# Dictionary to store model and mae for each player
models = {}
mae_dict = {}
prediction_by_player = {}

# File path definitions
train_catapult_path = "data/catapult_by_player_final/"
train_jm_path = "data/jm_clean_by_player/"
test_catapult_path = "data/test/catapult_by_player_normalized/"
test_jm_path = "data/test/jm_by_player/"

# Get list of test files
train_jm_files = os.listdir(train_jm_path)
test_jm_files = os.listdir(test_jm_path)

# Train and test for each player -------------------------------------------- #
for player_id in train_jm_files:
    player_id = player_id.split(".")[0]

    if player_id not in test_jm_files:
        print(f"{player_id} not in test")
        continue
    
    # Create dataset loaders
    train_catapult_loader, train_jm_loader = create_catapult_jm_loader(train_catapult_path, train_jm_path, player_id, batch_size, gnr)
    test_catapult_loader, test_jm_loader = create_catapult_jm_loader(test_catapult_path, test_jm_path + player_id + "/", player_id, batch_size, gnr, is_train=False)

    # Training
    model = FatigueNet(nb_features_catapult, nb_features_jm)
    model.train_with_gradient_accumulation(train_catapult_loader, train_jm_loader, num_epochs, accumulation_steps, device)
    models[player_id] = model

    # Testing
    test_mae = model.evaluate_model(test_catapult_loader, test_jm_loader, device)
    mae_dict[player_id] = test_mae
    print(f'MAE on test dataset : {test_mae}')

# Average MAE  ------------------------------------------------------------- #
values = list(mae_dict.values())
average = sum(values) / len(values)
print(average)

MAE for 1 model 1 player : 1.7521

MAE for 1 global model : 3.1936