In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold
from itertools import product

In [3]:
 # load the dataset, split into input (X) and output (y) variables
dataset = np.loadtxt('ML-CUP23-TR.csv', delimiter=',')
X = dataset[:,1:11]
y = dataset[:,11:14]

X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

In [4]:
# Define a simple regression neural network
class RegressorNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RegressorNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


In [56]:

# Set hyperparameters
input_size = 10
output_size = 3

hidden_sizes = [30]
learning_rates = [0.01]
l2_coefficients = [1e-2]

epochs = 2000 #max limit

# Define K-fold cross-validation (external cycle)
k_ext = 5
kf_ext = KFold(n_splits=k_ext, shuffle=True, random_state=42)

# Lists to store test loss for each external fold
test_losses = []

# Perform K-fold cross-validation
for fold_ext, (train_indices_ext, val_indices_ext) in enumerate(kf_ext.split(X)):
    #print(f"\nFold {fold + 1}/{k_folds}")

    # Split the data into training and validation sets
    X_tr_ext, X_test = X[train_indices_ext], X[val_indices_ext]
    y_tr_ext, y_test = y[train_indices_ext], y[val_indices_ext]

    best_hyperparams = []
    best_loss = float('inf')

    for hidden_size, learning_rate, l2_coefficient in product(hidden_sizes,learning_rates,l2_coefficients):

        # Define K-fold cross-validation
        k_folds = 5
        kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

        # Lists to store training and validation loss for each fold
        train_losses = []
        val_losses = []
        epoch_reached = []
        # Perform K-fold cross-validation
        for fold, (train_indices, val_indices) in enumerate(kf.split(X_tr_ext)):
            #print(f"\nFold {fold + 1}/{k_folds}")

            # Split the data into training and validation sets
            X_train, X_val = X_tr_ext[train_indices], X_tr_ext[val_indices]
            y_train, y_val = y_tr_ext[train_indices], y_tr_ext[val_indices]

            # Define the model, loss function, and optimizer
            model = RegressorNN(input_size, hidden_size, output_size)
            criterion = nn.MSELoss()
            optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=l2_coefficient)

            # Training loop
            prev_loss = float('inf')
            for epoch in range(epochs):
                # Forward pass
                outputs = model(X_train)
                loss = criterion(outputs, y_train)

                # Backward pass and optimization
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                #if (epoch + 1) % 10 == 0:
                #    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")

                # Check for convergence
                if abs(prev_loss - loss.item()) < 1e-3:
                    #print(f"Convergence reached. at epoch {epoch} Stopping training.")
                    break

                prev_loss = loss.item()

            epoch_reached.append(epoch)

            # Save training loss
            train_losses.append(loss.item())

            # Validation
            with torch.no_grad():
                model.eval()
                val_outputs = model(X_val)
                val_loss = criterion(val_outputs, y_val)
                #mse = mean_squared_error(y_val.numpy(), val_outputs.numpy())
                #print(f"Validation MSE: {mse:.4f}")
                #print(f"Validation Loss: {val_loss.item():.4f}")

                # Save validation loss for plotting
                val_losses.append(val_loss.item())

            model.train()  # Set the model back to training mode
        print(f'hidden_size={hidden_size}; lr={learning_rate}; lambda = {l2_coefficient} (max epoch reached: {epoch_reached})--> '
            f'train_loss = {np.mean(train_losses):.4} +- {np.std(train_losses):.4} | '
            f'val_loss = {np.mean(val_losses):.4} +- {np.std(val_losses):.4}')

        if np.mean(val_losses) < best_loss:
            best_loss = np.mean(val_losses)
            best_hyperparams = [hidden_size, learning_rate, l2_coefficient]

    print(f'fold {fold_ext+1}, loss = {best_loss:.4} with best hyperparams: {best_hyperparams}\n')
    test_losses.append(best_loss)

print(f'\ntest losses: {test_losses}')
print(f'final score: {np.mean(test_losses)} +- {np.std(test_losses)}')

hidden_size=30; lr=0.01; lambda = 0.01 (max epoch reached: [1233, 1292, 1394, 1199, 1210])--> train_loss = 2.273 +- 0.1559 | val_loss = 2.545 +- 0.4523
fold 1, loss = 2.545 with best hyperparams: [30, 0.01, 0.01]

hidden_size=30; lr=0.01; lambda = 0.01 (max epoch reached: [1270, 1362, 1172, 1385, 1319])--> train_loss = 2.362 +- 0.1011 | val_loss = 2.761 +- 0.4947
fold 2, loss = 2.761 with best hyperparams: [30, 0.01, 0.01]

hidden_size=30; lr=0.01; lambda = 0.01 (max epoch reached: [1211, 1370, 1400, 1383, 1284])--> train_loss = 2.3 +- 0.1074 | val_loss = 2.722 +- 0.3759
fold 3, loss = 2.722 with best hyperparams: [30, 0.01, 0.01]



KeyboardInterrupt: 

In [49]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.uniform_(m.weight)
        m.bias.data.fill_(0.01)

model = RegressorNN(10,10,10)
#model.apply(init_weights)


# Access the model's state_dict
model_state_dict = model.state_dict()

# Access the weights of a specific layer (e.g., fc1)
fc1_weights = model_state_dict['fc1.weight']
fc1_biases = model_state_dict['fc1.bias']

# Access the weights of another layer (e.g., fc2)
fc2_weights = model_state_dict['fc2.weight']
fc2_biases = model_state_dict['fc2.bias']

# Print the shapes of the weights and biases
print("fc1_weights shape:", fc1_weights.shape)
print("fc1_biases shape:", fc1_biases.shape)
print("fc2_weights shape:", fc2_weights.shape)
print("fc2_biases shape:", fc2_biases.shape)

print(fc2_weights)


fc1_weights shape: torch.Size([10, 10])
fc1_biases shape: torch.Size([10])
fc2_weights shape: torch.Size([10, 10])
fc2_biases shape: torch.Size([10])
tensor([[-1.1876e-01,  1.6094e-01,  2.5509e-01,  2.6358e-01, -2.1193e-01,
         -1.8279e-01,  8.5015e-02,  9.2099e-02,  1.5224e-01,  2.6061e-01],
        [-2.2983e-01,  1.2229e-01, -2.2024e-01, -2.8799e-01, -1.3209e-01,
          1.5875e-01, -2.7715e-01, -2.7533e-01,  2.9598e-02, -2.2387e-01],
        [ 1.9596e-01,  4.4822e-02,  2.3118e-01, -1.3483e-01, -3.9399e-02,
         -2.6586e-01,  2.2417e-02, -6.1234e-02, -2.4896e-01,  3.0357e-01],
        [-2.8605e-01,  1.1999e-01,  1.1818e-01, -2.3773e-01,  1.3742e-01,
         -1.9452e-04, -2.5460e-01,  7.4685e-02,  7.7807e-03, -2.6752e-01],
        [-2.1724e-02,  2.9486e-01, -3.0922e-01,  1.3453e-02,  1.1495e-01,
         -1.8406e-01, -2.3747e-01,  8.5294e-02, -1.4132e-01,  1.6882e-01],
        [-2.6298e-01, -1.0811e-01, -9.7503e-03,  1.6967e-01, -1.7480e-01,
         -3.0791e-01,  1.3297e-