Universidad Torcuato Di Tella

Licenciatura en Tecnología Digital\
**Tecnología Digital VI: Inteligencia Artificial**


In [4]:
import os
import torch
import torchaudio
import tarfile
import wandb
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torch.utils.data import Dataset
from torchaudio.datasets import GTZAN
from torch.utils.data import DataLoader
import torchaudio.transforms as tt
from torch.utils.data import random_split
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"


# TP3: Encodeador de música



## Orden de pasos

0. Elijan GPU para que corra mas rapido (RAM --> change runtime type --> T4 GPU)
1. Descargamos el dataset y lo descomprimimos en alguna carpeta en nuestro drive.
2. Conectamos la notebook a gdrive y seteamos data_dir con el path a los archivos.
3. Visualización de los archivos
4. Clasificación
5. Evaluación




In [5]:
project_name='TP3-TD6'
username = "sansonmariano-universidad-torcuato-di-tella"
wandb.login(key="d2875c91a36209496ee81454cccd95ebe3dc948d")
wandb.init(project = project_name, entity = username)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: jaimeamigo (sansonmariano-universidad-torcuato-di-tella). Use `wandb login --relogin` to force relogin
wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\felip\_netrc


In [6]:
random_seed = 42

torch.manual_seed(random_seed)

# Definir parámetros
samplerate = 22050
data_dir = './genres_5sec'

init_batch_size = 20
init_num_epochs = 10
init_lr = 0.0005

In [7]:
# Función para parsear géneros
def parse_genres(fname):
    parts = fname.split('/')[-1].split('.')[0]
    return parts

# Definir la clase del dataset
class MusicDataset(Dataset):
    def __init__(self, root):
        super().__init__()
        self.root = root
        self.files = []
        for c in os.listdir(root):
            self.files += [os.path.join(root, c, fname) for fname in os.listdir(os.path.join(root, c)) if fname.endswith('.wav')]
        self.classes = list(set(parse_genres(fname) for fname in self.files))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fpath = self.files[idx]
        genre = parse_genres(fpath)
        class_idx = self.classes.index(genre)
        audio = torchaudio.load(fpath)[0]
        return audio, class_idx

In [8]:
import torch
import torchaudio
from torch.utils.data import DataLoader, Subset, Dataset
from sklearn.model_selection import StratifiedShuffleSplit

# Function to normalize the audio data across the dataset
def normalize_audio_data(dataset):
    """
    Normalize the dataset by calculating the mean and standard deviation
    of all audio samples and then applying standardization.
    """
    all_data = []
    for i in range(len(dataset)):
        audio, _ = dataset[i]
        all_data.append(audio)
    stacked_data = torch.cat(all_data, dim=1)  # Concatenate along the time dimension for mean/std calculation
    mean = stacked_data.mean()
    std = stacked_data.std()

    # Apply normalization to each sample
    normalized_data = []
    for i in range(len(dataset)):
        audio, label = dataset[i]
        normalized_audio = (audio - mean) / std
        normalized_data.append((normalized_audio, label))

    return normalized_data

# Function to create stratified train, validation, and test DataLoaders
def create_dataloaders(dataset, batch_size, test_size=0.3, val_size=0.5, random_state=42):
    """
    Normalizes the dataset, splits it into train, validation, and test subsets,
    and returns corresponding DataLoaders.
    """
    # Normalize the dataset
    normalized_data = normalize_audio_data(dataset)

    # Extract labels for stratified splitting
    labels = [label for _, label in normalized_data]

    # Stratified split: train and temporary (val+test) split
    split = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state)
    for train_idx, temp_idx in split.split(range(len(normalized_data)), labels):
        train_dataset = Subset(normalized_data, train_idx)
        temp_dataset = Subset(normalized_data, temp_idx)

    # Stratified split on temp data: validation and test split
    val_test_labels = [labels[i] for i in temp_idx]
    split = StratifiedShuffleSplit(n_splits=1, test_size=val_size, random_state=random_state)
    for val_idx, test_idx in split.split(temp_idx, val_test_labels):
        val_dataset = Subset(normalized_data, [temp_idx[i] for i in val_idx])
        test_dataset = Subset(normalized_data, [temp_idx[i] for i in test_idx])

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

    return train_loader, val_loader, test_loader

# Usage example with MusicDataset
dataset = MusicDataset(data_dir)
batch_size = 20
train_loader, val_loader, test_loader = create_dataloaders(dataset, batch_size)


In [9]:
list_files=os.listdir(data_dir)

classes = []

for file in list_files:

  name='{}/{}'.format(data_dir,file)

  if os.path.isdir(name):

    classes.append(file)

### 3. Visualización de los archivos

In [10]:
def audio_to_spectrogram(waveform):
    # Ensure the waveform is in the correct shape
    if len(waveform.shape) == 1:
        waveform = waveform.unsqueeze(0)
    
    # Convert the waveform to a spectrogram
    spectrogram = tt.Spectrogram()(waveform)
    return spectrogram

def process_dataloader_to_spectrograms(dataloader):
    spectrograms = []
    
    for batch in dataloader:
        # Assuming the batch is a tuple (waveforms, labels) and waveforms are the audio data
        waveforms, labels = batch
        
        # Process each waveform in the batch
        batch_spectrograms = [audio_to_spectrogram(waveform) for waveform in waveforms]
        
        # Append to the list of spectrograms
        spectrograms.append((torch.stack(batch_spectrograms), labels))
    
    return spectrograms

In [11]:
input_channels = 1  # for RGB images, or 1 for grayscale
num_classes = 10    # depends on your specific classification task

# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [12]:
train_spectogram = process_dataloader_to_spectrograms(train_loader)
val_spectogram = process_dataloader_to_spectrograms(val_loader)
test_spectogram = process_dataloader_to_spectrograms(test_loader)

### Ejercicio 1

Modelo que recibe el tamaño de la capa y la cantidad de capas

In [13]:
# Ajuste en la clase del modelo
class ExperimentNN(nn.Module):
    def __init__(self, input_size, num_classes, layer_size, layers):
        super(ExperimentNN, self).__init__()
        self.fc_layers = nn.ModuleList()
        self.dropout = nn.Dropout(p=0.5)  # Dropout layer with 50% probability

        for layer in range(layers - 1):
            if layer == 0:
                self.fc_layers.append(nn.Linear(input_size, layer_size))
            else:
                self.fc_layers.append(nn.Linear(layer_size, layer_size))
        
        # Output layer
        self.fc_layers.append(nn.Linear(layer_size, num_classes))

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Aplanar la onda de audio
        x = (x - x.mean()) / x.std()  # Normalización
        for fc in self.fc_layers[:-1]:  # Skip last layer
            x = self.dropout(F.relu(fc(x)))  # ReLU + Dropout
        x = self.fc_layers[-1](x)  # Output layer (no activation)
        return x

In [14]:
import gc
import copy

def train_model(model, criterion, optimizer, scheduler, epochs, train_loader, val_loader, device):
    best_loss = float("inf")
    best_model_state = None  # To store the best model's state

    for epoch in range(epochs):
        # Training loop
        model.train()
        train_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)
        print(f"Epoch {epoch+1}/{epochs} - Train loss: {train_loss:.4f}")

        # Validation step
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        val_loss /= len(val_loader)
        print(f"Validation loss: {val_loss:.4f}")

        # Check if the current validation loss is the best
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_state = copy.deepcopy(model.state_dict())  # Save model state
            print(f"New best validation loss: {best_loss:.4f}")

        # Update learning rate with scheduler
        scheduler.step(val_loss)
        
        # Clear cache and collect garbage
        gc.collect()
        torch.cuda.empty_cache()

    # Load the best model's state into the model and return it
    model.load_state_dict(best_model_state)
    return best_loss, model  # Return the final best model


In [15]:

input_size = samplerate * 5

num_classes = len(dataset.classes)  # Número de clases (géneros musicales)

# Define nuevos valores de hiperparámetros para experimentar
layers_list = [2, 3, 5, 7]       # Pruebas con más capas
sizes_list = [32, 64, 128, 256]  # Pruebas con más unidades en cada capa

best_val_loss = float("inf")
best_val_accuracy = 0
learning_rate = 0.0005
weight_decay = 1e-4
best_model = None

# Loop de experimentación
for layers in layers_list:
    for size in sizes_list:
        print(f"Experimentando con {layers} capas y {size} unidades por capa...")
        
        # Inicializar el modelo con la configuración actual
        model = ExperimentNN(input_size,
                             num_classes,
                             size,
                             layers).to(device)

        # Definir el criterio y optimizador con los pesos de las clases
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

        # Define el scheduler
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

        # Llama a train_model pasándole el scheduler
        validation_loss, temp_model = train_model(
            model, criterion, optimizer, scheduler=scheduler, epochs=15,
            train_loader=train_loader, val_loader=val_loader, device=device
        )

        # Calcular precisión de validación
        temp_model.eval()
        correct = 0
        total = len(val_loader.dataset)
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = temp_model(inputs)  # Use temp_model here
                predicted = outputs.argmax(dim=1)
                correct += (predicted == labels).sum().item()
        val_accuracy = 100 * correct / total

        print(f"Loss de validación para {layers} capas y {size} unidades: {validation_loss}")
        print(f"Precisión de validación: {val_accuracy:.2f}%")

        # Guardar el modelo con mejor precisión y menor pérdida
        if val_accuracy > best_val_accuracy or (val_accuracy == best_val_accuracy and validation_loss < best_val_loss):
            best_val_loss = validation_loss
            best_val_accuracy = val_accuracy
            best_model = temp_model  # Store the model with the best configuration
            print(f"Nuevo mejor modelo encontrado con {layers} capas y {size} unidades.")

print(f"Mejor modelo: {layers} capas y {size} unidades, con precisión de validación de {best_val_accuracy:.2f}% y pérdida de validación de {best_val_loss:.4f}")


Experimentando con 2 capas y 32 unidades por capa...
Epoch 1/15 - Train loss: 3.5379
Validation loss: 4.0607
New best validation loss: 4.0607




Epoch 2/15 - Train loss: 2.8485
Validation loss: 4.6007
Epoch 3/15 - Train loss: 1.9416
Validation loss: 4.9296
Epoch 4/15 - Train loss: 1.6447
Validation loss: 5.3143
Epoch 5/15 - Train loss: 1.4859
Validation loss: 5.4273
Epoch 6/15 - Train loss: 1.2327
Validation loss: 5.2674
Epoch 7/15 - Train loss: 1.1366
Validation loss: 5.2504
Epoch 8/15 - Train loss: 0.9605
Validation loss: 5.3706
Epoch 9/15 - Train loss: 0.8903
Validation loss: 5.1963
Epoch 10/15 - Train loss: 1.0175
Validation loss: 5.7369
Epoch 11/15 - Train loss: 0.9583
Validation loss: 5.6599
Epoch 12/15 - Train loss: 0.8215
Validation loss: 5.1936
Epoch 13/15 - Train loss: 0.7246
Validation loss: 5.3675
Epoch 14/15 - Train loss: 0.6201
Validation loss: 5.3349
Epoch 15/15 - Train loss: 0.6945
Validation loss: 5.3079
Loss de validación para 2 capas y 32 unidades: 4.060677021741867
Precisión de validación: 8.78%
Nuevo mejor modelo encontrado con 2 capas y 32 unidades.
Experimentando con 2 capas y 64 unidades por capa...
Epoc

In [16]:
# Evaluación final en el conjunto de prueba
print("Evaluando el mejor modelo en el conjunto de prueba...")
best_model.eval()
test_loss = 0.0
correct = 0
total = len(test_loader.dataset)
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = best_model(inputs)

        loss = criterion(outputs, labels)
        test_loss += loss.item()

        predicted = outputs.argmax(dim=1)
        correct += (predicted == labels).sum().item()

test_loss /= len(test_loader)
test_accuracy = 100 * correct / total

print(f"Loss en el conjunto de prueba: {test_loss:.4f}")
print(f"Precisión en el conjunto de prueba: {test_accuracy:.2f}%")

Evaluando el mejor modelo en el conjunto de prueba...
Loss en el conjunto de prueba: 2.4154
Precisión en el conjunto de prueba: 15.44%


### 4. Clasificación

In [17]:
class CNN(nn.Module):
    def __init__(self, input_channels, num_classes, conv_layers_config):
        super(CNN, self).__init__()

        # Initialize the list to hold the convolutional layers
        self.conv_layers = nn.ModuleList()

        # Initialize the number of input channels for the first layer
        in_channels = input_channels

        # Dynamically create convolutional layers based on the configuration
        for (out_channels, kernel_size, stride, padding) in conv_layers_config:
            self.conv_layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding))
            in_channels = out_channels  # Update in_channels for the next layer

        # Calculate the size after convolution and pooling to define the fully connected layer
        # Assuming pooling reduces the size by a factor of 2 at each layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # Get the final feature map size (after all conv and pooling layers)
        self.final_feature_map_size = self._get_conv_output_size(input_channels, conv_layers_config)
        
        # Define 9 fully connected layers with 256 nodes each
        self.fc_layers = nn.ModuleList()
        self.fc_layers.append(nn.Linear(self.final_feature_map_size, 64))  # First fully connected layer
        for _ in range(2):  # Add 8 more fully connected layers with 256 nodes
            self.fc_layers.append(nn.Linear(64, 64))
        
        # Output layer
        self.fc_out = nn.Linear(64, num_classes)  # Output layer for classification
        
    def _get_conv_output_size(self, input_channels, conv_layers_config):
        # Sample input size (height x width) to calculate the final feature map size
        # You can adjust these values based on your actual input size
        height = 201  # Replace with your actual input height
        width = 552   # Replace with your actual input width
        
        # Apply each convolutional and pooling layer
        for (out_channels, kernel_size, stride, padding) in conv_layers_config:
            height = (height + 2 * padding - kernel_size) // stride + 1
            width = (width + 2 * padding - kernel_size) // stride + 1
            height = height // 2  # Max pooling halves the height
            width = width // 2    # Max pooling halves the width
        
        # Return the total number of features after all convolutional and pooling layers
        return out_channels * height * width

    def forward(self, x):
        # Apply each convolutional layer followed by ReLU and pooling
        for conv_layer in self.conv_layers:
            x = F.relu(conv_layer(x))
            x = self.pool(x)
        
        # Flatten the output before passing it to the fully connected layers
        x = x.view(x.size(0), -1)  # Flatten the feature map

        # Apply the fully connected layers
        for fc in self.fc_layers:
            x = F.relu(fc(x))
        
        # Output layer (classification)
        x = self.fc_out(x)

        return x


In [18]:
import copy

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device="cpu"):
    """
    Train the model and select the best gradients based on validation loss.
    """
    model.train()  # Set the model to training mode
    best_val_loss = float("inf")
    best_gradients = None  # To store gradients with the lowest validation loss
    best_accuracy = 0

    for epoch in range(num_epochs):
        running_loss = 0.0
        print(f"\nStarting Epoch {epoch+1}/{num_epochs}")
        
        # Training loop
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Accumulate training loss
            running_loss += loss.item()
            
            # Print loss for every batch
            if (i + 1) % 10 == 0 or (i + 1) == len(train_loader):
                print(f"  Batch {i+1}/{len(train_loader)} - Loss: {loss.item():.4f}")
        
        # Calculate average training loss for the epoch
        avg_train_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch+1} Training completed. Average Loss: {avg_train_loss:.4f}")
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = len(val_loader)
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
        
        accuracy = 100 * correct / total

        avg_val_loss = val_loss / total
        print(f"Epoch {epoch+1} Validation completed. Average Loss: {avg_val_loss:.4f}")
        
        # If this epoch has the best (lowest) validation loss, save the gradients
        if (accuracy > best_accuracy) or ((accuracy == best_accuracy) and (avg_val_loss < best_val_loss)):
            best_val_loss = avg_val_loss
            best_accuracy = accuracy
            # Capture gradients
            best_gradients = {name: param.grad.clone() for name, param in model.named_parameters() if param.grad is not None}
            print(f"New best gradients stored for epoch {epoch+1} with validation loss {best_val_loss:.4f} and accuracy {accuracy}%")
        
        # Set model back to training mode
        model.train()

    print("\nTraining complete.")
    
    return best_gradients

def test_model_configuration(model, test_loader, criterion, device):
    """
    Evaluate the model on the test set.
    """
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct = 0
    total = 0  # Total number of samples processed

    # Ensure no gradient computation during evaluation
    with torch.no_grad():
        for inputs, labels in test_loader:  # Assuming test_loader is the correct DataLoader for your test set
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)

            # Compute the loss
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Get the predicted class labels (the one with the highest logit)
            _, predicted = torch.max(outputs.data, 1)

            # Accumulate total samples
            total += labels.size(0)

            # Accumulate correct predictions
            correct += (predicted == labels).sum().item()

    # Calculate average test loss
    test_loss /= len(test_loader)

    # Calculate accuracy
    accuracy = 100 * correct / total

    return test_loss, accuracy

def test_multiple_configurations(train, val, test, criterion, device, num_epochs=10):
    """
    Test multiple model configurations, applying the best gradients for evaluation,
    and save the best model based on accuracy or validation loss.
    """
    # Different configurations for the CNN model
    configurations = [
        [(32, 3, 1, 1), (64, 3, 1, 1), (128, 3, 1, 1)],  # Configuration 1
        [(32, 5, 1, 2), (64, 5, 1, 2)],                  # Configuration 2
        [(16, 3, 1, 1), (32, 3, 1, 1), (64, 3, 1, 1)],   # Configuration 3
        [(64, 3, 1, 1), (128, 3, 1, 1), (256, 3, 1, 1)]  # Configuration 4
    ]
    
    best_model = None
    best_accuracy = 0.0
    best_loss = float("inf")
    
    for idx, conv_layers_config in enumerate(configurations):
        print(f"\nTesting Configuration {idx + 1} with convolutional layers: {conv_layers_config}")
        
        # Initialize the model with the current configuration
        model = CNN(input_channels=1, num_classes=10, conv_layers_config=conv_layers_config)
        model.to(device)
        
        # Initialize optimizer (e.g., Adam) and train the model
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        
        # Train the model on the training set
        best_gradients = train_model(model, train, val, criterion, optimizer, num_epochs, device)
        
        # Apply the best gradients to the model parameters
        if best_gradients is not None:
            with torch.no_grad():
                for name, param in model.named_parameters():
                    if name in best_gradients and param.grad is not None:
                        param.grad.copy_(best_gradients[name])
        
        # Evaluate the model on the test set with best gradients applied
        test_loss, accuracy = test_model_configuration(model, test, criterion, device)
        
        # Print the results for the current configuration
        print(f"Configuration {idx + 1} Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.2f}%")
        
        # Save the best model based on accuracy; if accuracy is the same, use loss as the tie-breaker
        if accuracy > best_accuracy or (accuracy == best_accuracy and test_loss < best_loss):
            best_accuracy = accuracy
            best_loss = test_loss
            best_model = copy.deepcopy(model)
            print(f"New best model found with accuracy: {best_accuracy:.2f}% and loss: {best_loss:.4f}")

    print(f"\nBest Model Test Accuracy: {best_accuracy:.2f}% with Loss: {best_loss:.4f}")
    return best_loss, best_model

In [19]:
# Assume 'test_loader' is the DataLoader for your test set, and 'criterion' is the loss function (e.g., CrossEntropyLoss)
best_model = test_multiple_configurations(train_spectogram, val_spectogram, test_spectogram, criterion, device, 10)


Testing Configuration 1 with convolutional layers: [(32, 3, 1, 1), (64, 3, 1, 1), (128, 3, 1, 1)]

Starting Epoch 1/10
  Batch 10/35 - Loss: 2.4088
  Batch 20/35 - Loss: 2.3754
  Batch 30/35 - Loss: 2.2925
  Batch 35/35 - Loss: 2.3715
Epoch 1 Training completed. Average Loss: 30.7698
Epoch 1 Validation completed. Average Loss: 2.3298
New best gradients stored for epoch 1 with validation loss 2.3298 and accuracy 0.0%

Starting Epoch 2/10
  Batch 10/35 - Loss: 2.2604
  Batch 20/35 - Loss: 2.1988
  Batch 30/35 - Loss: 2.1564
  Batch 35/35 - Loss: 1.9377
Epoch 2 Training completed. Average Loss: 2.2236
Epoch 2 Validation completed. Average Loss: 2.6260

Starting Epoch 3/10
  Batch 10/35 - Loss: 2.2124
  Batch 20/35 - Loss: 2.2867
  Batch 30/35 - Loss: 2.2275
  Batch 35/35 - Loss: 2.1500
Epoch 3 Training completed. Average Loss: 2.2205
Epoch 3 Validation completed. Average Loss: 2.2919
New best gradients stored for epoch 3 with validation loss 2.2919 and accuracy 0.0%

Starting Epoch 4/10


In [20]:
print(f"Loss: {best_model[0]} - Model: {best_model[1]}")

Loss: 5.647017657756805 - Model: CNN(
  (conv_layers): ModuleList(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc_layers): ModuleList(
    (0): Linear(in_features=110400, out_features=64, bias=True)
    (1-2): 2 x Linear(in_features=64, out_features=64, bias=True)
  )
  (fc_out): Linear(in_features=64, out_features=10, bias=True)
)


In [21]:
cnn_best_model = best_model[1]

test_model_configuration(cnn_best_model,test_spectogram,criterion, device)

(5.647017657756805, 24.161073825503355)

In [22]:
class different_act_CNN(nn.Module):
    def __init__(self, input_channels=1, num_classes=10, activation_function="relu"):
        super(different_act_CNN, self).__init__()

        # Define the convolutional layers (fixed as per example)
        self.conv_layers = nn.ModuleList([
            nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1),  # Conv layer 1
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),              # Conv layer 2
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)              # Conv layer 3
        ])
        
        # Max Pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Calculate the output feature size after conv layers and pooling
        self.final_feature_map_size = self._get_conv_output_size(input_channels)

        # Define fully connected layers (fixed as per example)
        self.fc_layers = nn.ModuleList([
            nn.Linear(self.final_feature_map_size, 64),  # First fully connected layer
            nn.Linear(64, 64),                           # Second fully connected layer
            nn.Linear(64, 64)                            # Third fully connected layer
        ])

        # Output layer
        self.fc_out = nn.Linear(64, num_classes)

        # Set activation function
        self.activation = self._get_activation_function(activation_function)

    def _get_conv_output_size(self, input_channels):
        # Sample input size (height x width)
        height, width = 201, 552  # Replace with actual input height and width

        # Pass through convolutional and pooling layers to determine final size
        for layer in self.conv_layers:
            height = (height + 2 * layer.padding[0] - layer.kernel_size[0]) // layer.stride[0] + 1
            width = (width + 2 * layer.padding[1] - layer.kernel_size[1]) // layer.stride[1] + 1
            height //= 2  # Max pooling halves the height
            width //= 2   # Max pooling halves the width

        # Output feature map size
        return height * width * 128

    def _get_activation_function(self, activation_function):
        # Map the string to the appropriate activation function
        if activation_function == "relu":
            return F.relu
        elif activation_function == "leaky_relu":
            return F.leaky_relu
        elif activation_function == "tanh":
            return torch.tanh
        elif activation_function == "sigmoid":
            return torch.sigmoid
        elif activation_function == "softmax":
            return F.softmax
        elif activation_function == "elu":
            return F.elu
        elif activation_function == "selu":
            return F.selu
        elif activation_function == "gelu":
            return F.gelu
        elif activation_function == "swish":
            return lambda x: x * torch.sigmoid(x)  # Swish activation: x * sigmoid(x)
        elif activation_function == "hard_sigmoid":
            return F.hardsigmoid
        else:
            raise ValueError(f"Unsupported activation function: {activation_function}")

    def forward(self, x):
        # Forward pass through conv layers with the selected activation function and pooling
        for conv_layer in self.conv_layers:
            x = self.activation(conv_layer(x))
            x = self.pool(x)
        
        # Flatten the output from conv layers
        x = x.view(x.size(0), -1)

        # Forward pass through fully connected layers with the selected activation function
        for fc_layer in self.fc_layers:
            x = self.activation(fc_layer(x))
        
        # Final output layer
        x = self.fc_out(x)

        return x

In [23]:
# Function to test different activation functions
def test_multiple_activation_functions(train, val, test, criterion, device, num_epochs=10):
    activation_functions = ["relu", "leaky_relu", "tanh", "sigmoid", "softmax", "elu", "selu", "gelu", "swish", "hard_sigmoid"]
    
    best_model = None
    best_accuracy = 0.0
    best_loss = float("inf")
    
    for activation_function in activation_functions:
        print(f"\nTesting activation function: {activation_function}")
        
        model = different_act_CNN(input_channels=1, num_classes=10, activation_function=activation_function)
        model.to(device)
        
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        
        best_gradients = train_model(model, train, val, criterion, optimizer, num_epochs, device)
        
        if best_gradients is not None:
            with torch.no_grad():
                for name, param in model.named_parameters():
                    if name in best_gradients and param.grad is not None:
                        param.grad.copy_(best_gradients[name])
        
        test_loss, accuracy = test_model_configuration(model, test, criterion, device)
        
        print(f"Activation function: {activation_function} Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.2f}%")
        
        if accuracy > best_accuracy or (accuracy == best_accuracy and test_loss < best_loss):
            best_accuracy = accuracy
            best_loss = test_loss
            best_model = copy.deepcopy(model)
            print(f"New best model found with accuracy: {best_accuracy:.2f}% and loss: {best_loss:.4f}")

    print(f"\nBest Model Test Accuracy: {best_accuracy:.2f}% with Loss: {best_loss:.4f}")
    return best_loss, best_model

In [24]:
# Assume 'test_loader' is the DataLoader for your test set, and 'criterion' is the loss function (e.g., CrossEntropyLoss)
best_loss, best_model = test_multiple_activation_functions(train_spectogram, val_spectogram, test_spectogram, criterion, device, 10)


Testing activation function: relu

Starting Epoch 1/10
  Batch 10/35 - Loss: 3.7923
  Batch 20/35 - Loss: 2.1062
  Batch 30/35 - Loss: 2.2496
  Batch 35/35 - Loss: 2.1477
Epoch 1 Training completed. Average Loss: 26.8719
Epoch 1 Validation completed. Average Loss: 2.2374
New best gradients stored for epoch 1 with validation loss 2.2374 and accuracy 0.0%

Starting Epoch 2/10
  Batch 10/35 - Loss: 1.9306
  Batch 20/35 - Loss: 2.1099
  Batch 30/35 - Loss: 2.0890
  Batch 35/35 - Loss: 2.0647
Epoch 2 Training completed. Average Loss: 2.1597
Epoch 2 Validation completed. Average Loss: 2.2873

Starting Epoch 3/10
  Batch 10/35 - Loss: 1.9281
  Batch 20/35 - Loss: 1.9140
  Batch 30/35 - Loss: 1.9856
  Batch 35/35 - Loss: 1.0595
Epoch 3 Training completed. Average Loss: 1.9783
Epoch 3 Validation completed. Average Loss: 2.6419

Starting Epoch 4/10
  Batch 10/35 - Loss: 1.4790
  Batch 20/35 - Loss: 1.7335
  Batch 30/35 - Loss: 1.5896
  Batch 35/35 - Loss: 0.8898
Epoch 4 Training completed. Aver

  x = self.activation(conv_layer(x))
  x = self.activation(fc_layer(x))


  Batch 10/35 - Loss: 2.2922
  Batch 20/35 - Loss: 2.3140
  Batch 30/35 - Loss: 2.2875
  Batch 35/35 - Loss: 2.3146
Epoch 1 Training completed. Average Loss: 2.3056
Epoch 1 Validation completed. Average Loss: 2.3068
New best gradients stored for epoch 1 with validation loss 2.3068 and accuracy 0.0%

Starting Epoch 2/10
  Batch 10/35 - Loss: 2.2926
  Batch 20/35 - Loss: 2.3122
  Batch 30/35 - Loss: 2.2891
  Batch 35/35 - Loss: 2.3119
Epoch 2 Training completed. Average Loss: 2.3048
Epoch 2 Validation completed. Average Loss: 2.3061
New best gradients stored for epoch 2 with validation loss 2.3061 and accuracy 0.0%

Starting Epoch 3/10
  Batch 10/35 - Loss: 2.2934
  Batch 20/35 - Loss: 2.3104
  Batch 30/35 - Loss: 2.2906
  Batch 35/35 - Loss: 2.3094
Epoch 3 Training completed. Average Loss: 2.3044
Epoch 3 Validation completed. Average Loss: 2.3055
New best gradients stored for epoch 3 with validation loss 2.3055 and accuracy 0.0%

Starting Epoch 4/10
  Batch 10/35 - Loss: 2.2942
  Batch 

In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, ExponentialLR, ReduceLROnPlateau
import torch.nn.functional as F
import copy

class DifferentOptCNN(nn.Module):
    def __init__(self, optimizer_class, scheduler_fn, learning_rate, scheduler_lr, input_channels=1, num_classes=10):
        super(DifferentOptCNN, self).__init__()

        # Capas convolucionales con ajustes en kernel_size y stride para evitar reducción excesiva
        self.conv_layers = nn.ModuleList([
            nn.Conv2d(input_channels, 32, kernel_size=5, stride=2, padding=2),  # Conv layer 1
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),              # Conv layer 2
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)              # Conv layer 3
        ])
        
        # Max Pooling layer con tamaño de kernel ajustado
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Calcular el tamaño final después de las convoluciones
        self.final_feature_map_size = self._get_conv_output_size(input_channels)

        # Capas densas (sin cambios)
        self.fc_layers = nn.ModuleList([
            nn.Linear(self.final_feature_map_size, 64),
            nn.Linear(64, 64),
            nn.Linear(64, 64)
        ])

        # Capa de salida
        self.fc_out = nn.Linear(64, num_classes)

        # Definir función de activación ELU
        self.activation = F.elu

        # Configuración del optimizador y scheduler
        self.optimizer = optimizer_class(self.parameters(), lr=learning_rate)
        self.scheduler = scheduler_fn(self.optimizer, scheduler_lr)

    def _get_conv_output_size(self, input_channels):
        # Tamaño de entrada (ajustar si es necesario)
        height, width = 201, 552  # Reemplaza con el tamaño correcto de entrada si cambió

        # Cálculo del tamaño de salida
        for layer in self.conv_layers:
            height = (height + 2 * layer.padding[0] - layer.kernel_size[0]) // layer.stride[0] + 1
            width = (width + 2 * layer.padding[1] - layer.kernel_size[1]) // layer.stride[1] + 1
            height //= 2  # Max pooling reduce la altura a la mitad
            width //= 2   # Max pooling reduce el ancho a la mitad

        return height * width * 128  # Tamaño de la característica de salida final


    def forward(self, x):
        # Pasar por las capas convolucionales con la función de activación ELU y pooling
        for conv_layer in self.conv_layers:
            x = self.activation(conv_layer(x))
            x = self.pool(x)
        
        # Aplanar la salida de las capas convolucionales antes de pasar por las densas
        x = x.view(x.size(0), -1)

        # Pasar por las capas densas con la función de activación ELU
        for fc_layer in self.fc_layers:
            x = self.activation(fc_layer(x))
        
        # Capa de salida final
        x = self.fc_out(x)

        return x


In [39]:
def grid_search_model_configurations(train, val, test, criterion, device, num_epochs=10):
    # Hiperparámetros a probar
    optimizers = {
        "SGD": optim.SGD,
        "Adam": optim.Adam,
        "RMSprop": optim.RMSprop
    }
    schedulers = {
        "StepLR": lambda opt, lr: StepLR(opt, step_size=5, gamma=lr),
        "ExponentialLR": lambda opt, lr: ExponentialLR(opt, gamma=lr),
        "ReduceLROnPlateau": lambda opt, lr: ReduceLROnPlateau(opt, mode='min', patience=3, factor=lr)
    }
    learning_rates = [0.001, 0.01, 0.1]

    best_model = None
    best_accuracy = 0.0
    best_loss = float("inf")
    best_config = {}


    for optimizer_name, optimizer_class in optimizers.items():
        for scheduler_name, scheduler_fn in schedulers.items():
            for lr in learning_rates:
                print(f"\nTesting config: Optimizer={optimizer_name}, Scheduler={scheduler_name}, Learning Rate={lr}")

                # Inicializar el modelo con la función de activación fija
                model = DifferentOptCNN(
                    optimizer_class=optimizer_class,
                    scheduler_fn=scheduler_fn,
                    learning_rate=lr,
                    scheduler_lr=lr,
                    input_channels=20,  # Ajuste según el tamaño de los datos
                    num_classes=10
                )
                model.to(device)

                # Entrenar el modelo
                best_gradients = train_model(model, train, val, criterion, model.optimizer, num_epochs, device)

                # Evaluar el modelo en el conjunto de prueba
                test_loss, accuracy = test_model_configuration(model, test, criterion, device)
                
                print(f"Config: Optimizer={optimizer_name}, Scheduler={scheduler_name}, LR={lr} | Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.2f}%")

                # Guardar el mejor modelo según la precisión y la pérdida en el conjunto de prueba
                if accuracy > best_accuracy or (accuracy == best_accuracy and test_loss < best_loss):
                    best_accuracy = accuracy
                    best_loss = test_loss
                    best_model = copy.deepcopy(model)
                    best_config = {
                        "optimizer": optimizer_name,
                        "scheduler": scheduler_name,
                        "learning_rate": lr
                    }
                    print(f"New best model found with accuracy: {best_accuracy:.2f}% and loss: {best_loss:.4f}")

    print(f"\nBest Model Test Accuracy: {best_accuracy:.2f}% with Loss: {best_loss:.4f}")
    print(f"Best Configuration: {best_config}")
    return best_loss, best_model, best_config

# Ejemplo de uso:
grid_search_model_configurations(train_loader, val_loader, test_loader, criterion, device, num_epochs=10)


Testing config: Optimizer=SGD, Scheduler=StepLR, Learning Rate=0.001

Starting Epoch 1/10


RuntimeError: Given input size: (32x1x55125). Calculated output size: (32x0x27562). Output size is too small