In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold, GridSearchCV
from itertools import product
from sklearn.metrics import mean_squared_error

In [3]:
 # load the dataset, split into input (X) and output (y) variables
dataset = np.loadtxt('ML-CUP23-TR.csv', delimiter=',')
X = dataset[:,1:11]
y = dataset[:,11:14]

X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

In [21]:
# Define a simple regression neural network
class RegressorNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RegressorNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Set hyperparameters
input_size = 10
output_size = 3

hidden_sizes = [30]
learning_rates = [0.01,0.001]
l2_coefficients = [1e-4]

epochs = 300

# Define K-fold cross-validation (external cycle)
k_ext = 5
kf_ext = KFold(n_splits=k_ext, shuffle=True, random_state=42)

# Lists to store test loss for each external fold
test_losses = []

# Perform K-fold cross-validation
for fold_ext, (train_indices_ext, val_indices_ext) in enumerate(kf_ext.split(X)):
    #print(f"\nFold {fold + 1}/{k_folds}")

    # Split the data into training and validation sets
    X_tr_ext, X_test = X[train_indices_ext], X[val_indices_ext]
    y_tr_ext, y_test = y[train_indices_ext], y[val_indices_ext]

    best_hyperparams = []
    best_loss = 1e3

    for hidden_size, learning_rate, l2_coefficient in product(hidden_sizes,learning_rates,l2_coefficients):
        # Define the model, loss function, and optimizer
        model = RegressorNN(input_size, hidden_size, output_size)
        criterion = nn.MSELoss()
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=l2_coefficient)

        # Define K-fold cross-validation
        k_folds = 5
        kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

        # Lists to store training and validation loss for each epoch
        train_losses = []
        val_losses = []
        # Perform K-fold cross-validation
        for fold, (train_indices, val_indices) in enumerate(kf.split(X_tr_ext)):
            #print(f"\nFold {fold + 1}/{k_folds}")

            # Split the data into training and validation sets
            X_train, X_val = X_tr_ext[train_indices], X_tr_ext[val_indices]
            y_train, y_val = y_tr_ext[train_indices], y_tr_ext[val_indices]

            # Training loop
            for epoch in range(epochs):
                # Forward pass
                outputs = model(X_train)
                loss = criterion(outputs, y_train)

                # Backward pass and optimization
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                #if (epoch + 1) % 100 == 0:
                #    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")

            # Save training loss for plotting
            train_losses.append(loss.item())

            # Validation
            with torch.no_grad():
                model.eval()
                val_outputs = model(X_val)
                val_loss = criterion(val_outputs, y_val)
                #mse = mean_squared_error(y_val.numpy(), val_outputs.numpy())
                #print(f"Validation MSE: {mse:.4f}")
                #print(f"Validation Loss: {val_loss.item():.4f}")

                # Save validation loss for plotting
                val_losses.append(val_loss.item())

            model.train()  # Set the model back to training mode

        print(f'hidden_size={hidden_size}; lr={learning_rate}; lambda = {l2_coefficient} --> '
            f'train_loss = {np.mean(train_losses):.4} +- {np.std(train_losses):.4} | '
            f'val_loss = {np.mean(val_losses):.4} +- {np.std(val_losses):.4}')

        if np.mean(val_losses) < best_loss:
            best_loss = np.mean(val_losses)
            best_hyperparams = [hidden_size, learning_rate, l2_coefficient]

    print(f'fold {fold_ext+1}, loss = {best_loss:.4} with best hyperparams: {best_hyperparams}\n')
    test_losses.append(best_loss)

print(f'\ntest losses: {test_losses}')
print(f'final score: {np.mean(test_losses)} +- {np.std(test_losses)}')

hidden_size=30; lr=0.01; lambda = 0.0001 --> train_loss = 3.417 +- 1.076 | val_loss = 3.81 +- 1.454
hidden_size=30; lr=0.001; lambda = 0.0001 --> train_loss = 11.42 +- 5.498 | val_loss = 11.97 +- 6.991
fold 1, loss = 3.81 with best hyperparams: [30, 0.01, 0.0001]

hidden_size=30; lr=0.01; lambda = 0.0001 --> train_loss = 3.338 +- 1.333 | val_loss = 3.75 +- 1.329
hidden_size=30; lr=0.001; lambda = 0.0001 --> train_loss = 12.65 +- 5.827 | val_loss = 12.82 +- 5.741
fold 2, loss = 3.75 with best hyperparams: [30, 0.01, 0.0001]

hidden_size=30; lr=0.01; lambda = 0.0001 --> train_loss = 3.075 +- 1.012 | val_loss = 3.263 +- 1.053
hidden_size=30; lr=0.001; lambda = 0.0001 --> train_loss = 11.65 +- 5.803 | val_loss = 11.79 +- 5.297
fold 3, loss = 3.263 with best hyperparams: [30, 0.01, 0.0001]

hidden_size=30; lr=0.01; lambda = 0.0001 --> train_loss = 3.313 +- 1.149 | val_loss = 3.585 +- 1.218
hidden_size=30; lr=0.001; lambda = 0.0001 --> train_loss = 12.36 +- 5.726 | val_loss = 13.18 +- 6.561
