# ML CUP - Neural Networks - Manual lr decay (and momentum)
In this notebook we implement a manual decay of the learning rate, and also a decay of the momentum (for the optimizers that require it). We study if this implementation improves the model performance.

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold
from itertools import product

import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.optim.lr_scheduler import ExponentialLR
from torch.optim.lr_scheduler import MultiStepLR
from torch.optim import RMSprop

from sklearn.model_selection import train_test_split

In [None]:
from IPython.display import Audio, display

#function made to reproduce an allarm, made run at the after a grid search to allert that the previous is finished
#based of windows system
def play_sound():
    sound_file = "C:\Windows\Media\Alarm01.wav"  # Sostituisci con il percorso del tuo file audio
    display(Audio(filename=sound_file, autoplay=True))

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

### Regressor

In [None]:
# Defines a regression neural network

class RegressorNNVar(nn.Module):
    def __init__(self, hidden_size, activation_function, num_layers, dropout_prob, input_size=10, output_size=3):
        super(RegressorNNVar, self).__init__()

        # input layer
        self.layers=[nn.Linear(input_size, hidden_size[0])]

        # hidden layers
        # The number of layers designated are created with a for cycle, using the vector hidden_size to obtain the correct number of neurons for each layer
        i=0
        for _ in range(num_layers - 1):
            self.layers.append(nn.Linear(hidden_size[i], hidden_size[i+1]))
            self.layers.append(activation_function)
            self.layers.append(nn.Dropout(p=dropout_prob)) #disattivo casualmente un'insieme di unità
            i+=1

        # output layers
        self.layers.append(nn.Linear(hidden_size[i], output_size))

        # Create a Sequential container for the layers
        self.model=nn.Sequential(*self.layers)

    def forward(self, x):
        return self.model(x)

### Training model and grid search definition

In [None]:
def training_model(x_train, y_train, x_test, y_test, proportions, neuron_number, learning_rate, momentum, batch_size,
                   reg_coeff, activation_function, num_layers, dropout, decay, mom_decay, epoch_decay, min_lr, min_mom,
                   optimizer, num_epochs=1000, plot_curves=False):

    """
    Train the regression model and evaluate it on a test (can also be validation, depending on the context).

    Parameters:
    - x_train (torch.Tensor): Training input data.
    - y_train (torch.Tensor): Training target data.
    - x_test (torch.Tensor): Test input data.
    - y_test (torch.Tensor): Test target data.
    - proportions (list of floats): Defines how many neurons each layer should contain in proportion to total neurons.
    - neuron_number (int): Total number of neurons across all layers.
    - learning_rate (float): Learning rate for the optimizer.
    - momentum (float): Momentum for the optimizer.
    - batch_size (int): Batch size for training.
    - reg_coeff (float): Regularization coefficient for weight decay.
    - activation (torch.nn.Module): Activation function for the model.
    - num_layers (int): Number of hidden layers in the model.
    - dropout (float):
    - decay (float): Decay factor for learning rate.
    - mom_decay (float): Decay factor for momentum.
    - epoch_decay (int): How many epochs operate the decay.
    - min_lr (float): Minimum value that learning rate can assume.
    - min_mom (float): Minimum value that momentum can assume.
    - optimiz (string): Which optimizer to use.
    - num_epochs (int, optional): Number of training epochs (default: 1000).
    - plot_curves (bool, optional): Whether to plot training curves (default: False).

    Returns:
    - tuple: Tuple containing the number of epochs, final training loss, final test loss, final training MEE, MAE and MSE, and final test MEE, MAE and MSE.

    The function trains a neural network regression model using the specified hyperparameters and evaluates its performance on the test set.
    """



    # Defining a hidden_size vector which as much zeros as the number of layers our network should have
    hidden_size =[]
    for j in range(num_layers):
        hidden_size.append(0)

    # For each element of hidden_size, set its value as the multiplication between the total neuron number and the percentage in proportions
    for i in range(len(proportions)):
        hidden_size[i] = int(proportions[i]*neuron_number)

    # Create an instance of the model
    model=RegressorNNVar(hidden_size, activation_function, num_layers, dropout)
    model.to(device)

    # Define the loss function and optimizer
    criterion=nn.MSELoss()
    if optimizer == "SGD":
        optimizer = optim.SGD(model.parameters(), learning_rate, momentum, weight_decay=reg_coeff)

    if optimizer == 'Adam':
        optimizer = optim.Adam(model.parameters(), learning_rate, weight_decay=reg_coeff)

    if optimizer == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=reg_coeff)

    # Preprocessing dataset
    train_dataset = torch.utils.data.TensorDataset(x_train, y_train) # Create a tensor containing dataset
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True) # Dividing data in minibatchs

    # Lists to store training and test losses, mees, maes and mses
    train_losses, test_losses, train_mees, test_mees, train_maes, test_maes, train_mses, test_mses = [], [], [], [], [], [], [], []

    # Parameters to stop at training convergence
    min_mee=float("inf")
    patience_counter, patience = 0, 20


    # Starting the training cycle for each epoch
    for epoch in range(num_epochs):
        model.train() # Set the model to training mode

        for inputs, labels in train_dataloader:
            outputs=model(inputs) # Forward pass
            loss=criterion(outputs, labels) # Compute the loss

            optimizer.zero_grad() # Zero the gradients
            loss.backward() # Backward pass
            optimizer.step() # Update weights

        # Obtaining the current learning rate and momentum values from optimizers
        for param_group in optimizer.param_groups:
            lr_att=param_group['lr']
            if type(optimizer) == torch.optim.SGD:
                mom_att=param_group['momentum']
            if type(optimizer) == torch.optim.RMSprop:
                mom_att=param_group['alpha']
            else:
                mom_att=0 # For Adam's case

        # Each time it reaches a epoch_decay number of epochs, it computes the decay of parameters
        if (epoch%epoch_decay)==0:
            for param_group in optimizer.param_groups:
                if lr_att>min_lr:
                    param_group['lr'] *= decay # Learning rate decay for each optimizer
                if mom_att>min_mom:
                    if type(optimizer) == torch.optim.SGD:
                        param_group['momentum'] = max(initial_momentum, param_group['momentum'] * mom_decay) # Momentum decay for SGD
                    if type(optimizer) == torch.optim.RMSprop:
                        param_group['alpha'] *= mom_decay # Momentum decay for RMSprop


        model.eval() # Set the model to evaluation mode

        # Calculate loss
        y_pred=model(x_train)
        train_loss=criterion(y_pred, y_train)
        # Calculate mee, mae and mse
        train_mee, train_mae, train_mse = metrics(y_pred,y_train)
        train_losses.append(train_loss.item())
        train_mees.append(train_mee)
        train_maes.append(train_mae)
        train_mses.append(train_mse)

        # Evaluation on the test/validation set
        with torch.no_grad():
            test_outputs=model(x_test)
            test_loss=criterion(test_outputs, y_test)

            # Calculate mee, mae and msee
            test_mee, test_mae, test_mse = metrics(test_outputs,y_test)
            test_mees.append(test_mee)
            test_maes.append(test_mae)
            test_mses.append(test_mse)
            test_losses.append(test_loss.item())

        print(f'Training - Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss.item():.4f}, '
            f'MEE: {train_mee:.4f} | Test - Epoch [{epoch+1}/{num_epochs}], '
            f'Loss: {test_loss.item():.4f} MEE: {test_mee:.4f} ', end='\r')

        # Check for convergence
        if train_mee < min_mee and abs(train_mee-min_mee)>1e-3:
            patience_counter=0
            min_mee=train_mee
        else:
            patience_counter +=1

        if patience_counter==patience:
            break

    print(f'\n N. Epochs = {epoch+1} - Loss (train | test)= ({train_loss.item():.4} | {test_loss.item():.4}) - MEE (train | test) = ({train_mee} | {test_mee}) - MAE (train | test) = ({train_mae} | {test_mae} - MSE (train | test) = ({train_mse} | {test_mse}))')


    if plot_curves:
        hyperparams=[hidden_size, learning_rate, momentum, batch_size, reg_coeff, activation_function, num_layers, dropout, decay, mom_decay, epoch_decay, num_epochs]
        plot_training_curves(epoch, train_losses, test_losses, train_mees, test_mees, hyperparams)



    return model, epoch+1, train_loss.item(), test_loss.item(), train_mee, test_mee, train_mae, test_mae, train_mse, test_mse

In [None]:
def perform_grid_search_kfold(proportionss, neuron_numbers, learning_rates, momentums, batch_sizes, reg_coeffs,
                              activations, layerss, dropouts, decays, mom_decays, epoch_decays, min_lrs, min_moms, optimiz,
                              k_folds, x, y, plot_curves=False, num_epochs=1000, N=1):

    """
    Perform grid search with k-fold cross-validation for hyperparameters.

    Parameters:
    - proportionss (list): List of vectors that define how many neurons each layer should contain in proportion to total neurons.
    - neuron_numbers (list): List of neuron numbers to search.
    - learning_rates (list): List of learning rates to search.
    - momentums (list): List of momentum values to search.
    - batch_sizes (list): List of batch sizes to search.
    - reg_coeffs (list): List of regularization coefficients to search.
    - activations (list): List of activation functions to search.
    - layerss (list): List of numbers of hidden layers to search.
    - dropouts (list):
    - decays (list): List of decay factors for learning rate.
    - mom_decays (list): List of decay factors for momentum.
    - epoch_decays (list): List of epochs that define when to decay the parameters.
    - min_lrs (list): List of minimum values that learning rate can assume.
    - min_moms (list): List of minimum values that momentum can assume.
    - optimizs (string): Which optimizer to use.
    - k_foldss (int): Number of folds for cross-validation.
    - x (numpy.ndarray): Input data.
    - y (numpy.ndarray): Target data.
    - plot_curves (bool, optional): Whether to plot training curves (default: False).
    - num_epochs (int, optional): Number of training epochs (default: 1000).
    - N (int): Number of times to train the model with different initializations (default: 1).

    Returns:
    - list: List of best hyperparameters.

    The function performs grid search with k-fold cross-validation for Monk classifier hyperparameters and returns the best hyperparameters.
    """


    best_mee=float('inf') #setto la migliore mee come infinito
    best_loss=float('inf')
    best_hyperparameters=[]
    finale="\n----- RISULTATI -----\n"

    # Counting the total of models the grid search will train
    count=1
    tot_modelli=len(proportionss)*len(neuron_numbers)*len(learning_rates)*len(momentums)*len(batch_sizes)*len(reg_coeffs)*len(activations)*len(decays)*len(mom_decays)*len(epoch_decays)*len(dropouts)*len(min_lrs)*len(min_moms)

    tot_iniz=k_folds*N

    # Pick out each parameter set from the grid search possible combinations
    for proportions, neuron_number, learning_rate, momentum, batch_size, reg_coeff, activation, layers, decay, mom_decay, epoch_decay, min_lr, min_mom, dropout in product(proportionss, neuron_numbers, learning_rates, momentums, batch_sizes, reg_coeffs, activations, layerss, decays, mom_decays, epoch_decays, min_lrs, min_moms, dropouts):
        print("\nModello "+str(count)+"\\"+str(tot_modelli))
        print(f'activation={activation}; layers={layers}; proportions={proportions}; neuron_number={neuron_number}; lr={learning_rate}; alpha = {momentum}; batch size = {batch_size}; lambda = {reg_coeff}; optim = {optimiz}; decay = {decay}; epoch decay = {epoch_decay}; minimum learning rate = {min_lr}; minimum momentum = {min_mom}')

        iniz=1

        kf=KFold(n_splits=k_folds, shuffle=True, random_state=42)

        # Lists to store training and validation losses, MEEs, MAEs, MSEs and maximum epochs for each initialization
        train_losses, val_losses, train_mees, val_mees, train_maes, val_maes, train_mses, val_mses, max_epochs = [], [], [], [], [], [], [], [], []

        # Perform K-fold cross-validation
        for fold, (train_indices, val_indices) in enumerate(kf.split(x,y)):

            # Split the data into training and validation (or test) sets
            X_train, X_val = x[train_indices], x[val_indices]
            Y_train, Y_val = y[train_indices], y[val_indices]

            # trainings
            for i in range (0,N):

                print("\nInizializzazione "+str(iniz)+"\\"+str(tot_iniz))
                _, max_epoch, train_loss, val_loss, train_mee, val_mee, train_mae, val_mae, train_mse, val_mse = training_model(
                x_train=X_train, y_train=Y_train, x_test=X_val, y_test=Y_val, proportions=proportions, neuron_number=neuron_number,
                learning_rate=learning_rate, momentum=momentum, batch_size=batch_size, reg_coeff=reg_coeff, activation_function=activation,
                num_layers=layers, dropout=dropout, decay=decay, mom_decay=mom_decay, epoch_decay=epoch_decay, min_lr=min_lr, min_mom=min_mom, optimizer=optimiz, num_epochs=num_epochs, plot_curves=plot_curves)

                # Saving training results
                train_losses.append(train_loss)
                val_losses.append(val_loss)
                train_mees.append(train_mee)
                val_mees.append(val_mee)
                train_maes.append(train_mae)
                val_maes.append(val_mae)
                train_mses.append(train_mse)
                val_mses.append(val_mse)
                max_epochs.append(max_epoch)
                iniz=iniz+1


        out=f'Final Results: activation={activation}; layers={layers}; proportions = {proportions}; neuron number={neuron_number}; lr={learning_rate}; alpha = {momentum}; batch size = {batch_size}; lambda = {reg_coeff}; optim = {optimiz}; learning rate decay = {decay}; momentum decay = {mom_decay}; epoch decay = {epoch_decay}; minimum learning rate = {min_lr}; minimum momentum = {min_mom} \n--> train_loss = {np.mean(train_losses):.4} +- {np.std(train_losses):.4} | val_loss = {np.mean(val_losses):.4} +- {np.std(val_losses):.4} \ntrain_mee = {np.mean(train_mees):.4} +- {np.std(train_mees):.4} | val_mee = {np.mean(val_mees):.4} +- {np.std(val_mees):.4} \ntrain_mae = {np.mean(train_maes):.4} +- {np.std(train_maes):.4} | val_mae = {np.mean(val_maes):.4} +- {np.std(val_maes):.4} \ntrain_mse = {np.mean(train_mses):.4} +- {np.std(train_mses):.4} | val_mse = {np.mean(val_mses):.4} +- {np.std(val_mses):.4}\nmean max epoch = {round(np.mean(max_epochs))}\n\n'
        count=count+1
        finale=finale+out

        # Checking out the best models for MEE and Loss
        if np.mean(val_mees)<best_mee:
            best_mee_out=out
            best_mee=np.mean(val_mees)
            best_hyperparams=[proportions, neuron_number, learning_rate, momentum, batch_size, reg_coeff, activation, layers, dropout, decay, mom_decay, epoch_decay, min_lr, min_mom]
        if np.mean(val_losses)<best_loss:
            best_loss_out=out
            best_loss=np.mean(val_losses)
            best_hyperparams_loss=[proportions, neuron_number, learning_rate, momentum, batch_size, reg_coeff, activation, layers, dropout, decay, mom_decay, epoch_decay, min_lr, min_mom]
            #fallo anche per loss

    finale=finale+"\n---- MIGLIORI RISULTATI MEE ----\n"+best_mee_out+"\n---- MIGLIORI RISULTATI LOSS ----\n"+best_loss_out
    print(finale)
    print(best_hyperparams)
    return best_hyperparams

In [None]:
def train_with_different_initializations(x_train, y_train, x_test, y_test, proportions, neuron_number, learning_rate,
                                         momentum, batch_size, reg_coeff, activation, layers, dropout, decay, mom_decay,
                                         epoch_decay, min_lr, min_mom, optimiz, max_num_epochs=1000, plot_curves=False, N=5):


    """
    Train the model multiple times with different weight initializations to estimate performance mean and variance.

    Parameters:
    - x_train (torch.Tensor): Training input data.
    - y_train (torch.Tensor): Training target data.
    - x_test (torch.Tensor): Test input data.
    - y_test (torch.Tensor): Test target data.
    - proportions (list of floats): Defines how many neurons each layer should contain in proportion to total neurons.
    - neuron_number (int): Total number of neurons across all layers.
    - learning_rate (float): Learning rate for the optimizer.
    - momentum (float): Momentum for the optimizer.
    - bs (int): Batch size for training.
    - reg_coeff (float): Regularization coefficient for weight decay.
    - activation (torch.nn.Module): Activation function for the model.
    - layers (int): Number of hidden layers in the model.
    - dropout (float):
    - decay (float): Decay factor for learning rate.
    - mom_decay (float): Decay factor for momentum.
    - epoch_decay (int): How many epochs operate the decay.
    - min_lr (float): Minimum value that learning rate can assume.
    - min_mom (float): Minimum value that momentum can assume.
    - optimiz (string): Which optimizer to use.
    - num_epochs (int, optional): Number of training epochs (default: 1000).
    - plot_curves (bool, optional): Whether to plot training curves (default: False).
    - N (int, optional): Number of times to train the model with different initializations (default: 5).

    Returns:
    None

    Prints the mean and standard deviation of training and test loss, as well as training and test Mean Euclidean Error (MEE).
    """


    # Lists to store training and test losses, mees, maes and mses
    train_losses, test_losses, train_mees, test_mees, train_maes, test_maes, train_mses, test_mses = [], [], [], [], [], [], [], []
    print(f'activation={activation}; layers={layers}; proportions={proportions}; neuron_number={neuron_number}; lr={learning_rate}; alpha = {momentum}; batch size = {batch_size}; lambda = {reg_coeff}; optim = {optimiz}; learning rate decay = {decay}; momentum decay = {mom_decay}; epoch decay = {epoch_decay}; minimum learning rate = {min_lr}; minimum momentum = {min_mom}\n')

    # trainings
    for i in range (0,N):
        _, epoch, train_loss, test_loss, train_mee, test_mee, train_mae, test_mae, train_mse, test_mse = training_model(x_train, y_train, x_test, y_test, proportions, neuron_number, learning_rate, momentum, batch_size, reg_coeff, activation, layers, dropout, decay, mom_decay, epoch_decay, min_lr, min_mom, optimiz, max_num_epochs, plot_curves)

        # Saving training results
        train_losses.append(train_loss)
        train_maes.append(train_mae)
        train_mses.append(train_mse)
        train_mees.append(train_mee)
        test_losses.append(test_loss)
        test_mees.append(test_mee)
        test_maes.append(test_mae)
        test_mses.append(test_mse)


    print(f'Train Loss: {np.mean(train_losses):.4} +- {np.std(train_losses):.4}')
    print(f'Test Loss: {np.mean(test_losses):.4} +- {np.std(test_losses):.4}')
    print(f'Train MEE: {np.mean(train_mees):.4} +- {np.std(train_mees):.4}')
    print(f'Test MEE: {np.mean(test_mees):.4} +- {np.std(test_mees):.4}')
    print(f'Train MAE: {np.mean(train_maes):.4} +- {np.std(train_maes):.4}')
    print(f'Test MAE: {np.mean(test_maes):.4} +- {np.std(test_maes):.4}')
    print(f'Train MSE: {np.mean(train_mses):.4} +- {np.std(train_mses):.4}')
    print(f'Test MSE: {np.mean(test_mses):.4} +- {np.std(test_mses):.4}')


### Funzioni ausiliarie

In [None]:
def metrics(y_pred, y_true):

    """
    Compute the mean Euclidean error, mean Squared error and mean Absolute error between two sets of 3D vectors.

    Parameters:
    - tensor1: PyTorch tensor of size (N, 3) representing the first set of 3D vectors
    - tensor2: PyTorch tensor of size (N, 3) representing the second set of 3D vectors

    Returns:
    - mean_error: Mean Euclidean error between the two sets of vectors
    """

    # Check if the tensors have the correct shape
    if y_pred.shape[1] != 3 or y_true.shape[1] != 3 or y_pred.shape[0] != y_true.shape[0]:
        raise ValueError("Input tensors must be of size (N, 3)")


    # Compute Euclidean distance
    euclidean_distance = torch.norm(y_pred - y_true, dim=1)

    # Calculate the mean Euclidean error
    mean_error = torch.mean(euclidean_distance)


    # Calculate the Mean Absolute Error (MAE)
    mae = F.l1_loss(y_pred, y_true)

    # Calculate the Mean Squared Error (MSE)
    mse = F.mse_loss(y_pred, y_true)


    return mean_error.item(), mae.item(), mse.item()

In [None]:
def plot_training_curves(epoch, train_losses, test_losses, train_mees, test_mees, hyperparams):

    """
    Plot training and test curves for loss and Mean Euclidean Error (MEE).

    Parameters:
    - epoch (int): The total number of training epochs.
    - train_losses (list): List of training losses for each epoch.
    - test_losses (list): List of test losses for each epoch.
    - train_mees (list): List of training MEE values for each epoch.
    - test_mees (list): List of test MEE values for each epoch.
    - hyperparams (list): List of hyperparameters used for the plot.

    Returns:
    None

    Plots four subplots:
    1. Training and test loss curves.
    2. Training and test MEE curves.
    3. Zoomed-in training and test loss curves with y-axis limit [0, 10].
    4. Zoomed-in training and test MEE curves with y-axis limit [0, 10].

    The hyperparameters are used in the plot title to provide additional context.
    """


    plt.figure(figsize=(12, 8))
    plt.suptitle(f'Batch Size={hyperparams[3]},Activation Function={hyperparams[5]}, Layers={hyperparams[6]} Hidden Units={hyperparams[0]}, Eta={hyperparams[1]}, Alpha={hyperparams[2]}, Lambda={hyperparams[4]}')
    # Loss plots
    plt.subplot(2, 2, 1)
    plt.plot(range(1, epoch + 2), train_losses, label='Training Loss', color = 'red')
    plt.plot(range(1, epoch + 2), test_losses, label='Test Loss', color = 'blue', linestyle='--')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # MEE plots
    plt.subplot(2, 2, 2)
    plt.plot(range(1, epoch + 2), train_mees, label='Training MEE', color='red')
    plt.plot(range(1, epoch + 2), test_mees, label='Test MEE', color = 'blue', linestyle='--')
    plt.xlabel('Epoch')
    plt.ylabel('MEE')
    plt.legend()

    # Loss plots
    plt.subplot(2, 2, 3)
    plt.plot(train_losses, label='Training Loss', color = 'red')
    plt.plot(test_losses, label='Validation Loss', color = 'blue', linestyle='--')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.ylim(0,10)
    plt.legend()

    # MEE plots
    plt.subplot(2, 2, 4)
    plt.plot(train_mees, label='Training MEE', color='red')
    plt.plot(test_mees, label='Validation MEE', color = 'blue', linestyle='--')
    plt.xlabel('Epoch')
    plt.ylabel('MEE')
    plt.ylim(0,10)
    plt.legend()

    plt.tight_layout()

    plt.show()

### Preprocessing dataset

In [None]:
# load the dataset, split into input (X) and output (y) variables
dataset = np.loadtxt('ML-CUP23-TR.csv', delimiter=',')
X = dataset[:,1:11]
y = dataset[:,11:14]

X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

X = X.to(device)
y = y.to(device)

# Split the data into training and testing sets (80%/20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Grid searches

### Prove con Adam

In [None]:
'''
#Performing the first grid search with decay related parameters
proportionss = [[1/3, 1/3, 1/3]]
hidden_neurons = [1000]
learning_rates = [1e-04, 3e-04]
momentums = [0]
batch_sizes = [128]
reg_coeffs = [1e-3]
activations = [nn.Tanh()]
layerss = [3]
dropouts = [0]
decays = [0.9, 0.95]
mom_decays = [1]
epoch_decays = [100, 150, 200]
min_lrs = [1e-09, 1e-08, 1e-07]
min_moms = [0]
optimiz = 'Adam'


# Finer grid serach
proportionss = [[1/3, 1/3, 1/3]]
hidden_neurons = [1000]
learning_rates = [1e-04, 2e-04]
momentums = [0]
batch_sizes = [128]
reg_coeffs = [1e-3]
activations = [nn.Tanh()]
layerss = [3]
dropouts = [0]
decays = [0.9, 0.85, 0.8]
mom_decays = [1]
epoch_decays = [200, 250, 300]
min_lrs = [1e-07, 5e-08]
min_moms = [0]
optimiz = 'Adam'
'''


best_hp = perform_grid_search_kfold(proportionss,
                                    hidden_neurons,
                                    learning_rates,
                                    momentums,
                                    batch_sizes,
                                    reg_coeffs,
                                    activations,
                                    layerss,
                                    dropouts,
                                    decays,
                                    mom_decays,
                                    epoch_decays,
                                    min_lrs,
                                    min_moms,
                                    optimiz,
                                    k_folds=3,
                                    x=X_train,
                                    y=y_train,
                                    num_epochs=5000,
                                    plot_curves=False,
                                    N=2)

### Prove con SGD

In [None]:
'''
#Performing the first grid search with decay related parameters
proportionss = [[1/3, 1/3, 1/3]]
hidden_neurons = [1000]
learning_rates = [1e-04, 3e-04]
momentums = [0.9, 0.8]
batch_sizes = [128]
reg_coeffs = [1e-3]
activations = [nn.Tanh()]
layerss = [3]
dropouts = [0]
decays = [0.9, 0.95]
mom_decays = [0.9, 0.8, 1]
epoch_decays = [100, 150]
min_lrs = [1e-09, 1e-08]
min_moms = [0.5, 0.3]
optimiz = 'SGD'


# Finer grid serach
proportionss = [[1/3, 1/3, 1/3]]
hidden_neurons = [1000]
learning_rates = [2e-04, 3e-04]
momentums = [0.9, 0.85]
batch_sizes = [128]
reg_coeffs = [1e-3]
activations = [nn.Tanh()]
layerss = [3]
dropouts = [0]
decays = [0.95]
mom_decays = [1]
epoch_decays = [150, 200]
min_lrs = [1e-8, 5e-9, 5e-8]
min_moms = [0.3, 0.2]
optimiz = 'SGD'
'''


best_hp = perform_grid_search_kfold(proportionss,
                                    hidden_neurons,
                                    learning_rates,
                                    momentums,
                                    batch_sizes,
                                    reg_coeffs,
                                    activations,
                                    layerss,
                                    dropouts,
                                    decays,
                                    mom_decays,
                                    epoch_decays,
                                    min_lrs,
                                    min_moms,
                                    optimiz,
                                    k_folds=3,
                                    x=X_train,
                                    y=y_train,
                                    num_epochs=5000,
                                    plot_curves=False,
                                    N=2)

### Prove con RMSprop

In [None]:
'''
#Performing the first grid search with decay related parameters
proportionss = [[1/3, 1/3, 1/3]]
hidden_neurons = [1000]
learning_rates = [1e-5, 1e-04]
momentums = [0.9, 0.8]
batch_sizes = [128]
reg_coeffs = [1e-3]
activations = [nn.Tanh()]
layerss = [3]
dropouts = [0]
decays = [0.9, 0.95]
mom_decays = [0.9, 0.8, 1]
epoch_decays = [100, 200, 400]
min_lrs = [1e-09, 1e-08]
min_moms = [0.5, 0.3]
optimiz = 'RMSprop'



# Finer grid serach
proportionss = [[1/3, 1/3, 1/3]]
hidden_neurons = [1000]
learning_rates = [1e-05, 3e-05]
momentums = [0.9, 0.85]
batch_sizes = [128]
reg_coeffs = [1e-3]
activations = [nn.Tanh()]
layerss = [3]
dropouts = [0]
decays = [0.9, 0.85]
mom_decays = [0.8, 0.85]
epoch_decays = [200, 300]
min_lrs = [1e-09, 5e-09, 5e-10]
min_moms = [0.4, 0.5, 0.6]
optimiz = 'RMSprop'




# See the difference with and without momentum decay
proportionss = [[1/3, 1/3, 1/3]]
hidden_neurons = [1000]
learning_rates = [1e-05]
momentums = [0.9]
batch_sizes = [128]
reg_coeffs = [1e-3]
activations = [nn.Tanh()]
layerss = [3]
dropouts = [0]
decays = [0.85]
mom_decays = [0.8, 1]
epoch_decays = [200]
min_lrs = [5e-10]
min_moms = [0.6]
optimiz = 'RMSprop'


'''


best_hp = perform_grid_search_kfold(proportionss,
                                    hidden_neurons,
                                    learning_rates,
                                    momentums,
                                    batch_sizes,
                                    reg_coeffs,
                                    activations,
                                    layerss,
                                    dropouts,
                                    decays,
                                    mom_decays,
                                    epoch_decays,
                                    min_lrs,
                                    min_moms,
                                    optimiz,
                                    k_folds=3,
                                    x=X_train,
                                    y=y_train,
                                    num_epochs=5000,
                                    plot_curves=False,
                                    N=2)

In [None]:
#calling a function that reproduces a sound, used as an alarm after grid searches
play_sound()