In [21]:
import pandas as pd
import os 
import numpy as np  
import librosa 
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import torch  
import torch.nn as nn   
from typing import List
import pytorch_lightning as pl  
import torch.nn.functional as F 

In [4]:
audio_dataframe = pd.read_csv('label_dataframe.csv')    

In [9]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, root_dir, feature = 'Chromagram',transform=None):
        self.dataframe = dataframe['file_name']
        self.root_dir = root_dir
        self.feature = feature
        self.transform = transform
        self.label = dataframe['root_note']

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        audio_file = os.path.join(self.root_dir, self.dataframe.iloc[idx, 0])

        if self.transform:
            audio_data = self.transform(audio_data)
        x, sr = librosa.load(audio_file, sr=None)
        
        return self.label[idx], self.get_features(x, sr, self.feature)
    
    def get_features(self, x, sr, feature='Chromagram'):
        
        rerturned_feature = np.empty((0, 0))     
        hop_length = int(44.1e3*2)
        if feature == 'Chromagram':
            n_chroma = 12
            n_octaves =7
            rerturned_feature = librosa.feature.chroma_cqt(y=x, sr=sr, n_chroma=n_chroma, n_octaves=n_octaves, hop_length=hop_length)
        
        elif feature == 'Mel Spectrogram':
            n_mels = 128    
            n_fft = hop_length
            rerturned_feature = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
        
        else: pass # Implement other features

        return rerturned_feature

In [11]:
columns_to_drop = ['label', 'instrument', 'type_of_sound', 'chord_type']

X = audio_dataframe.drop(columns=columns_to_drop, axis=1)
y = audio_dataframe['root_note']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

train_dataloader = CustomDataset(dataframe=X_train, root_dir='IDMT-SMT-CHORDS/trimmed_audio', feature='Chromagram')
test_dataloader = CustomDataset(dataframe=X_test, root_dir='IDMT-SMT-CHORDS/trimmed_audio', feature='Chromagram')
val_dataloader = CustomDataset(dataframe=X_val, root_dir='IDMT-SMT-CHORDS/trimmed_audio', feature='Chromagram')

In [23]:
class CustomLoss(torch.nn.Module):

    def forward(self, true_labels, estimated_labels):
        # TODO: hay que cambiar la mse_loss por un cross entropy loss (categorical)
        loss = F.mse_loss(true_labels, estimated_labels)
        return {'MMSE loss': loss}

In [20]:
class Chordifier_MLP(pl.LightningModule):

    num_iter = 0  # Variable estática global para llevar la cuenta de las iteraciones

    def __init__(self, in_channels: int = 16, out_channels: int = 1, hidden_dims: List = None, **kwargs):
        super(Chordifier_MLP, self).__init__()
        # Definición de variables internas
        self.out_channels = out_channels
        self.network = None
        self.total_train_loss = 0
        self.total_val_loss = 0
        self.denom_train = 0
        self.denom_val = 0

        # Inicialización de dimensiones ocultas si no se proporcionan
        if hidden_dims is None:
            hidden_dims = [32, 16, 8, 4]
        # Construye red MLP
        self.build_network(in_channels, out_channels, hidden_dims)

    def build_network(self, in_channels, out_channels, hidden_dims):
        """
        Builds a neural network with the specified architecture.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            hidden_dims (list): List of integers representing the dimensions of the hidden layers.

        Returns:
            None
        """
        # Input and hidden layers
        modules = []
        for h_dim in hidden_dims:
            modules.append(nn.Sequential(
                nn.Linear(in_features=in_channels, out_features=h_dim),
                nn.ReLU(),
                nn.BatchNorm1d(num_features=h_dim)
            ))
            in_channels = h_dim
        # Output layer
        modules.append(nn.Sequential(
            nn.Linear(in_features=in_channels, out_features=out_channels),
            nn.ReLU() # Final ReLU activation: output is nonnegative (loss proxy)
        ))

        self.network = nn.Sequential(*modules)

    def forward(self, input, **kwargs):
        """
        Forward pass of the neural network model.

        Args:
            input (torch.Tensor): The input tensor to the model.

        Returns:
            torch.Tensor: The output tensor from the model.
        """
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
        input = input.to(torch.float32)

        return self.network(input.to(device))

    def training_step(self, batch, batch_idx):
        """
        Performs a single training step.

        Args:
            batch: A tuple containing the true MSE and params_vector.
            batch_idx: The index of the current batch.

        Returns:
            The MMSE loss for the current training step.
        """
        label, feature_vector = batch
        estimated_label = self.forward(feature_vector)
        train_loss = CustomLoss()(label, torch.squeeze(estimated_label))
        self.log_dict({key: val.item() for key, val in train_loss.items()}, sync_dist=True)
        self.total_train_loss += train_loss['Train loss']
        self.denom_train += 1

        return train_loss['Train loss']
    
    def validation_step(self, batch, batch_idx):
        """
        Performs a validation step on a batch of data.

        Args:
            batch: A tuple containing the true MSE and the parameters vector.
            batch_idx: The index of the current batch.

        Returns:
            The MMSE loss for the validation step.
        """
        label, feature_vector = batch
        estimated_label = self.forward(feature_vector)
        val_loss = CustomLoss()(label, torch.squeeze(estimated_label))
        self.log_dict({f"val_{key}": val.item() for key, val in val_loss.items()}, sync_dist=True)
        self.total_val_loss += val_loss['Validation loss']
        self.denom_val += 1

        return val_loss['Validation loss']
    
    def on_validation_epoch_end(self):
        """
        Method called at the end of each validation epoch.

        Prints the total losses for the current epoch.

        Returns:
            None
        """
        if (self.denom_train == 0) or (self.denom_val == 0):
            self.denom_train = 1
            self.denom_val = 1
        print(
            f"Epoch {self.current_epoch}: Train Loss = {self.total_train_loss / self.denom_train}, Validation Loss = {self.total_val_loss / self.denom_val}")

    def configure_optimizers(self):
            """
            Configures the optimizer for the model.

            Returns:
                optimizer (torch.optim.Optimizer): The configured optimizer.
            """
            optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
            return optimizer
