In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
def load_data(path: str) -> tuple[torch.Tensor, torch.Tensor]:
  '''
    Load data from the specified path.
    path: str - file path
    return: X, y - the features and labels
  '''
  data = pd.read_csv(path)
  X = data.iloc[:, :-1].values
  y = data.iloc[:, -1].values
  y = LabelEncoder().fit_transform(y)
  X = torch.tensor(X, dtype=torch.float32)
  y = torch.tensor(y, dtype=torch.long)
  return X, y

In [3]:
X, y = load_data("sonar.csv")
print(X.shape, y.shape)
writer = SummaryWriter(log_dir='logs')

torch.Size([207, 60]) torch.Size([207])


In [4]:
# split data using 70/20/10 split ratio
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.125, random_state=1)

print(f"x_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"x_val: {X_val.shape}, y_val: {y_val.shape}")
print(f"x_test: {X_test.shape}, y_test: {y_test.shape}")

x_train: torch.Size([144, 60]), y_train: torch.Size([144])
x_val: torch.Size([21, 60]), y_val: torch.Size([21])
x_test: torch.Size([42, 60]), y_test: torch.Size([42])


In [5]:
def build_nn(architecture: int, bias: bool=True):
  '''
    Build a neural network model with the specified architecture
    architecture: int - the architecture of the model
    bias: bool - whether to include bias or not
    return: nn.Sequential - the model
  '''

  if architecture == 1: # 3 layers with ReLU activation function
   return nn.Sequential(
      nn.Linear(60, 30, bias=bias),
      nn.ReLU(),
      nn.Linear(30, 15, bias=bias),
      nn.ReLU(),
      nn.Linear(15, 2, bias=bias)
   )
  elif architecture == 2: # 5 layers with Tanh activation function
    return nn.Sequential(
      nn.Linear(60, 30, bias=bias),
      nn.Tanh(),
      nn.Linear(30, 15, bias=bias),
      nn.Tanh(),
      nn.Linear(15, 7, bias=bias),
      nn.Tanh(),
      nn.Linear(7, 3, bias=bias),
      nn.Tanh(),
      nn.Linear(3, 2, bias=bias)
    )
  elif architecture == 3: # 7 layers with Sigmoid activation function
    return nn.Sequential(
      nn.Linear(60, 50, bias=bias),
      nn.Sigmoid(),
      nn.Dropout(),
      nn.Linear(50, 40, bias=bias),
      nn.Sigmoid(),
      nn.Dropout(),
      nn.Linear(40, 30, bias=bias),
      nn.Sigmoid(),
      nn.Dropout(),
      nn.Linear(30, 20, bias=bias),
      nn.Sigmoid(),
      nn.Dropout(),
      nn.Linear(20, 10, bias=bias),
      nn.Sigmoid(),
      nn.Dropout(),
      nn.Linear(10, 5, bias=bias),
      nn.Sigmoid(),
      nn.Dropout(),
      nn.Linear(5, 2, bias=bias)
    )
  else: # 6 layers with Tanh activation function
    return nn.Sequential(
      nn.Linear(60, 50, bias=bias),
      nn.Tanh(),
      nn.Linear(50, 40, bias=bias),
      nn.Tanh(),
      nn.Linear(40, 20, bias=bias),
      nn.Tanh(),
      nn.Linear(20, 15, bias=bias),
      nn.Tanh(),
      nn.Linear(15, 10, bias=bias),
      nn.Tanh(),
      nn.Linear(10, 2, bias=bias)
    )

In [6]:
def train_loop(model, data_loader, loss_fn, optimizer):
  '''
    code from lecture 22
    Train the model using the training data.
    model: nn.Module - the model to train
    data_loader: DataLoader - the data loader for training data
    loss_fn: nn.CrossEntropyLoss - the loss function
    optimizer: optim - the optimizer
    return: float - the loss of the model
  '''
  model.train()
  epoch_loss = 0

  for batch, (X, y) in enumerate(data_loader):
    pred = model(X)
    loss = loss_fn(pred, y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    epoch_loss += loss.item()
  return epoch_loss

def test_loop(model, data_loader, loss_fn):
  '''
    code from lecture 22
    Test the model using the validation data.
    model: nn.Module - the model to test
    data_loader: DataLoader - the data loader for validation data
    loss_fn: nn.CrossEntropyLoss - the loss function
    return: float - the loss of the model
  '''
  model.eval()
  test_loss = 0

  with torch.no_grad():
    for X, y in data_loader:
      pred = model(X)
      loss = loss_fn(pred, y)

      test_loss += loss.item()
  return test_loss


def calculate_accuracy(model, data_loader):
  '''
    Calculate the accuracy of the model.
    model: nn.Module - the model to test
    data_loader: DataLoader - the data loader for the data
    return: float - the accuracy score of the model
  '''
  model.eval()
  true_y = []
  pred_y = []

  with torch.no_grad():
    for X, y in data_loader:
      pred = model(X)
      _, predicted = torch.max(pred, 1)
      true_y.extend(y.tolist())
      pred_y.extend(predicted.tolist())
  return accuracy_score(true_y, pred_y)

In [7]:
def parameter_tuning(architecture: int, optimizers: list, learning_rates: list, epochs: int, batch_size: int, X_train, y_train, X_val, y_val):
  '''
    Finds the best parameters for a given model.
    architecture: int - the architecture of the model
    optimizers: list - the list of optimizers to use
    learning_rates: list - the list of learning rates to use
    epochs: int - the number of epochs to run
    batch_size: int - the batch size
    X_train: torch.Tensor - the training features
    y_train: torch.Tensor - the training labels
    X_val: torch.Tensor - the validation features
    y_val: torch.Tensor - the validation labels
    return: dict - dictionary that contains the best parameters
  '''
  best_loss = float('inf')
  best_params = None

  for opt in optimizers:
    for lr in learning_rates:
      for bs in batch_size:
        #print(f"Running with optimizer: {opt}, learning rate: {lr}, batch size: {bs}")
        model = build_nn(architecture)
        train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=bs, shuffle=True)
        val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=bs, shuffle=False)

        if opt == "SGD":
          optimizer = optim.SGD(model.parameters(), lr=lr)
        elif opt == "Adam":
          optimizer = optim.Adam(model.parameters(), lr=lr)
        elif opt == "RMSprop":
          optimizer = optim.RMSprop(model.parameters(), lr=lr)
        else:
          optimizer = optim.Adagrad(model.parameters(), lr=lr)

        loss_fn = nn.CrossEntropyLoss()

        for epoch in range(epochs):
          train_loss = train_loop(model, train_loader, loss_fn, optimizer)
          val_loss = test_loop(model, val_loader, loss_fn)

          train_accuracy = calculate_accuracy(model, train_loader)
          val_accuracy = calculate_accuracy(model, val_loader)

          writer.add_scalar(f"Loss/train_{opt}_{lr}_{bs}", train_loss, epoch)
          writer.add_scalar(f"Loss/test_{opt}_{lr}_{bs}", val_loss, epoch)
          writer.add_scalar(f"train_accuracy_{opt}_{lr}_{bs}", train_accuracy, epoch)
          writer.add_scalar(f"test_accuracy_{opt}_{lr}_{bs}", val_accuracy, epoch)

          if val_loss < best_loss:
            best_loss = val_loss
            best_params = {"lr": lr, "epoch": epoch, "bs": bs, "optimizer": opt, "accuracy": val_accuracy}
  return best_params

In [8]:
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [16, 32, 64]
epochs = 1000
optimizers = ["SGD", "Adam", "RMSprop", "Adagrad"]

In [9]:
# best parameters for each model
model_1_params = parameter_tuning(1, optimizers, learning_rates, epochs, batch_sizes, X_train, y_train, X_val, y_val)
model_2_params = parameter_tuning(2, optimizers, learning_rates, epochs, batch_sizes, X_train, y_train, X_val, y_val)
model_3_params = parameter_tuning(3, optimizers, learning_rates, epochs, batch_sizes, X_train, y_train, X_val, y_val)
model_4_params = parameter_tuning(4, optimizers, learning_rates, epochs, batch_sizes, X_train, y_train, X_val, y_val)

In [10]:
print(f"Model 1 parameters: {model_1_params}")
print(f"Model 2 parameters: {model_2_params}")
print(f"Model 3 parameters: {model_3_params}")
print(f"Model 4 parameters: {model_4_params}")

Model 1 parameters: {'lr': 0.01, 'epoch': 231, 'bs': 64, 'optimizer': 'RMSprop', 'accuracy': 0.8095238095238095}
Model 2 parameters: {'lr': 0.001, 'epoch': 764, 'bs': 64, 'optimizer': 'RMSprop', 'accuracy': 0.8571428571428571}
Model 3 parameters: {'lr': 0.01, 'epoch': 298, 'bs': 64, 'optimizer': 'RMSprop', 'accuracy': 0.8571428571428571}
Model 4 parameters: {'lr': 0.1, 'epoch': 645, 'bs': 64, 'optimizer': 'SGD', 'accuracy': 0.8571428571428571}


In [11]:
def test_model(architecture: int, best_params: dict, X_train, y_train, X_test, y_test):
  '''
    Evaluate the model with the best parameters using the test data
    architecture: int - the architecture of the model
    best_params: dict - the best parameters for the model
    X_train: torch.Tensor - the training features
    y_train: torch.Tensor - the training labels
    X_test: torch.Tensor - the test features
    y_test: torch.Tensor - the test labels
    return: nn.Module - the trained model
  '''
  model = build_nn(architecture)
  train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=best_params["bs"], shuffle=True)
  test_loader = DataLoader(list(zip(X_test, y_test)), batch_size=best_params["bs"], shuffle=False)

  if best_params["optimizer"] == "SGD":
    optimizer = optim.SGD(model.parameters(), lr=best_params["lr"])
  elif best_params["optimizer"] == "Adam":
    optimizer = optim.Adam(model.parameters(), lr=best_params["lr"])
  elif best_params["optimizer"] == "RMSprop":
    optimizer = optim.RMSprop(model.parameters(), lr=best_params["lr"])
  else:
    optimizer = optim.Adagrad(model.parameters(), lr=best_params["lr"])

  loss_fn = nn.CrossEntropyLoss()

  for epoch in range(best_params["epoch"]):
    train_loss = train_loop(model, train_loader, loss_fn, optimizer)
    writer.add_scalar(f"Loss/train_{architecture}", train_loss, epoch)
  test_loss = test_loop(model, test_loader, loss_fn)
  test_accuracy = calculate_accuracy(model, test_loader)
  writer.add_scalar(f"Loss/test_{architecture}", test_loss, 0)
  writer.add_scalar(f"test_accuracy_{architecture}", test_accuracy, 0)
  print(f"Test loss for model {architecture}: {test_loss}, Test accuracy for model {architecture}: {test_accuracy}")

  return model, test_accuracy

In [12]:
model_1, model_1_accuracy = test_model(1, model_1_params, X_train, y_train, X_test, y_test)
model_2, model_2_accuracy = test_model(2, model_2_params, X_train, y_train, X_test, y_test)
model_3, model_3_accuracy = test_model(3, model_3_params, X_train, y_train, X_test, y_test)
model_4, model_4_accuracy = test_model(4, model_4_params, X_train, y_train, X_test, y_test)

Test loss for model 1: 0.5587701201438904, Test accuracy for model 1: 0.8095238095238095
Test loss for model 2: 0.6173585653305054, Test accuracy for model 2: 0.8095238095238095
Test loss for model 3: 0.6978033781051636, Test accuracy for model 3: 0.47619047619047616
Test loss for model 4: 0.3148244023323059, Test accuracy for model 4: 0.8809523809523809


In [13]:
accuracies = [model_1_accuracy, model_2_accuracy, model_3_accuracy, model_4_accuracy]
best_model = np.argmax(accuracies) + 1

In [14]:
print(f"The best model is model {best_model} with an accuracy of {accuracies[best_model - 1]}")

4
