In [42]:
!pip install torch torchvision matplotlib torchinfo torchviz pandas numpy

import torch
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from typing import List, Tuple, Dict, Any

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

torch.manual_seed(45)
np.random.seed(45)

print("Libraries imported.")

Libraries imported.


# Preprocessing

In [43]:
train_df_ori = pd.read_csv('titanic_data/train.csv')
test_df_ori = pd.read_csv('titanic_data/test.csv')

train_df = train_df_ori.copy()
test_df = test_df_ori.copy()

In [44]:
train_df.drop(['PassengerId', 'Name'], axis=1, inplace=True)
test_df.drop(['Name'], axis=1, inplace=True)
test_df.shape

(418, 10)

In [45]:
missing_too_many = ['Cabin', 'Ticket']
train_df.drop(missing_too_many, axis=1, inplace=True)
test_df.drop(missing_too_many, axis=1, inplace=True)

In [46]:
# OHE
def imputer(df: pd.DataFrame, columns: list):
    # find columns that contain nans and impute them with the median values
    df_copy = df.copy()
    for col in columns:
        median = df_copy[col].median()
        df_copy[col] = df_copy[col].fillna(median, inplace=False)
    return df_copy

def PandasOneHotEncodeNumpy(df: pd.DataFrame, columns: list):
    df_copy = df.copy()
    for i, col in enumerate(columns):
        uniques = df_copy[col].unique()
        uniques.sort() # to account for differing orders items occur in test vs train
        for unique in uniques[1:]:
            name = f"{columns[i]}_{unique}"
            df_copy.insert(0, name, [0] * df_copy.shape[0], allow_duplicates=False)
            uniqueMap = {f"{unique}":1}
            df_copy[name] = df_copy[col].map(uniqueMap)
            df_copy[name] = df_copy[name].fillna(0, inplace=False)
    df_copy.drop(columns, axis=1, inplace=True)
    col = df_copy.columns
    return col, df_copy.to_numpy().astype("float32")

columns_train = ['Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare']
columns_test = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare']

train_df['Embarked'] = train_df['Embarked'].fillna('S', inplace=False)
test_df['Embarked'] = test_df['Embarked'].fillna('S', inplace=False)

train_df = imputer(train_df, columns_train)
test_df = imputer(test_df, columns_test)

columns = ['Sex', 'Embarked']
# get all the unique values in the columns and 
train_col, train_df = PandasOneHotEncodeNumpy(train_df, columns)
test_col, test_df = PandasOneHotEncodeNumpy(test_df, columns)

In [47]:
test_col

Index(['Embarked_S', 'Embarked_Q', 'Sex_male', 'PassengerId', 'Pclass', 'Age',
       'SibSp', 'Parch', 'Fare'],
      dtype='object')

In [48]:
# Feature Scaling
# Standardization
# min-max scaling
def findI(key, cols):
    for i, val in enumerate(cols):
        if key == cols[i]:
            return i

def standardize(dp, cols_all, cols_target):
    dp_copy = dp
    for col in cols_target:
        column = dp_copy[:,findI(col, cols_all)]
        column = (column - column.min()) / (column.max() - column.min())
        dp_copy[:,findI(col, cols_all)] = column
    return dp_copy
        


train_df = standardize(train_df, train_col, ['Age', 'Fare'])
test_df = standardize(test_df, test_col, ['Age', 'Fare'])

In [49]:
train_col

Index(['Embarked_S', 'Embarked_Q', 'Sex_male', 'Survived', 'Pclass', 'Age',
       'SibSp', 'Parch', 'Fare'],
      dtype='object')

In [50]:
test_col

Index(['Embarked_S', 'Embarked_Q', 'Sex_male', 'PassengerId', 'Pclass', 'Age',
       'SibSp', 'Parch', 'Fare'],
      dtype='object')

In [51]:
# Convert to Tensor

In [52]:
class TitanicDataset:
    def __init__(self, X_data, y_data):
        self.X_data = torch.from_numpy(X_data)
        self.y_data = torch.from_numpy(y_data)
        self.n_samples = self.X_data.shape[0] 


In [53]:
X_before = train_df[:, :3]
X_after = train_df[:, 4:]
X = np.hstack([X_before, X_after])
y = train_df[:, 3]
def split_set(X_all, y_all, split):
    split_index = int(train_df.shape[0] * split)
    X_val = X_all[:split_index]
    y_val = y_all[:split_index]
    X = X_all[split_index:]
    y = y_all[split_index:]
    return X_val, y_val, X, y

X_val, y_val, X, y = split_set(X, y, .15)

In [54]:
val = TitanicDataset(X_val, y_val)
train = TitanicDataset(X, y)

# Building the Neural Network
1. Build an N-Layer Multi-Layer Perceptron Model using PyTorch, with RELU activation in the hidden layers, Gradient Descent optimization, and a Cross-Entropy Loss. Name your neural network class `MLP_Network`.
    * References:
	    1.  [A Quick Introduction to PyTorch (with a Hands-On Mini Project)](https://medium.com/ai-ml-interview-playbook/a-quick-introduction-to-pytorch-with-a-hands-on-mini-project-b58bd9220813)
        2. [Building Multilayer Perceptron Models in PyTorch - MachineLearningMastery.com](https://machinelearningmastery.com/building-multilayer-perceptron-models-in-pytorch/)
        3. [Building a PyTorch binary classification multi-layer perceptron from the ground up – Hutsons-hacks](https://hutsons-hacks.info/building-a-pytorch-binary-classification-multi-layer-perceptron-from-the-ground-up)
        4. [Introduction to PyTorch — PyTorch Tutorials 2.9.0+cu128 documentation](https://docs.pytorch.org/tutorials/beginner/introyt/introyt1_tutorial.html)
        5. Dr. Santos' collab code: [Google Colab](https://colab.research.google.com/drive/1FpzMOkUGtPQDljfS32uJLD_zz4wmQ6GF#scrollTo=843d9673)
2. Recall that the sizes of the input and output layers—that is, the number of neurons in each layer—are determined by the input data and the number of classes in the application, respectively. However, you will need to select the sizes for the hidden layers. Experiment with different hidden layer sizes, making each subsequent layer half the size of the previous one.(i.e., $m^{[l]} = 2*m^{[l+1]}$).
3. Hyperparameter search: The `MLP_Network`class should support different network depths and hidden layer sizes. You can perform the hyperparameter search manually or with the help of a tool like Optuna. You will train at least 36 different MLP models for the following hyperparameters:
    1. Network depth 2x
    2. Hidden layers sizes 2x
    3. Learning rates 3x
    4. Regularization 3x
    5. Number of iterations: Based on your loss curves make a choice for the number of Gradient Descent iterations controlled at the epoch level with the variable `num_epochs`.

In [57]:
import torch.nn as nn
class MLP_Network(nn.Module):
    def __init__(self, hidden_sizes: List[int], num_classes: int, input_size: int):
        super(MLP, self).__init__()

        self.layers = nn.ModuleList()
        last_size = input_size

        for hidden_size in hidden_sizes:
            self.layers.append(nn.Linear(last_size, hidden_size))
            self.layers.append(nn.ReLu())
            last_size = hidden_size
        
        self.input_size = input_size
        self.output = nn.Linear(last_size, num_classes)
        self.depth = len(hidden_sizes)

    def forward(self, x):

        if x.dim() > 2:
            x = x.reshape(-1, self.input_size)
        
        out = x
        for layer in self.layers:
            out = layer(out)

        out = self.output(out)

        return out

# Hyperparameters

In [64]:
input_size = X.shape[0]
hidden_sizes = []
network_depth = len(hidden_sizes)
learning_rates = []
regularization = 'l1'

In [65]:
def calculate_accuracy(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        return 100 * correct / total

In [67]:
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    output_loss = 0.0

    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterions(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        output_loss = running_loss / len(train_loader)

    final_accuracy = calculate_accuracy(model, train_loader)
    return outoput_loss, final_accuracy

In [68]:
def objective(trial: optuna.Trial, train_loader, val_loader, input_size, num_classes):
    depth = trial.suggest_categorical('depth', [2, 4])

    base_sizes = [128, 64, 32, 16]
    hidden_layer_sizes = base_sizes[:depth]

    lr = trial.suggest_categorical('lr', [0.1, 0.01, 0.001])
    wd = trial.suggest_categorical('weight_decay', [0.0, 0.001, 0.01])

    model = MLP_Network(
        input_size=input_size,
        hidden_layer_sizes=hidden_layer_sizes,
        num_classes=num_classes
    )

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=wd)

    train_model(model, train_loader, criterion, optimizer)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    final_val_losss = val_loss / len(val_loader)
    
    return final_val_loss

NameError: name 'optuna' is not defined

In [None]:
def optuna_search():
    train_loader, val_loader, INPUT_SIZE, NUM_CLASSES = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True),
                                                        data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False),
                                                        input_size,
                                                        2
    