In [None]:
import pandas as pd
import os
import numpy as np
import h5py

The training loop

Using the actual dataset from the matlab file given instead.

In [None]:
mat_file = "original_dataset/ReadBrownDwarf.mat"
data_path = "original_dataset/"
idTE = np.load(data_path + "idTE.npy")
idTR = np.load(data_path + "idTR.npy")
labelTE = np.load(data_path + "labelTE.npy")
labelTR = np.load(data_path + "labelTR.npy")

In [None]:
#Extracting the data from the mat file
with h5py.File(mat_file, 'r') as f:
    data = f["data"]
    data = pd.DataFrame(data).T
data = data.replace(0, data.mean())
data

In [None]:
X_train_list = []
X_test_list = []
y_train_list = []
y_test_list = []
for i in range(len(idTR)):
    X_train_list.append(data.iloc[idTR[i] - 1]) #idTR is 1 indexed
    X_test_list.append(data.iloc[idTE[i] - 1])  #idTE is 1 indexed
    y_train_list.append(labelTR[i])
    y_test_list.append(labelTE[i])

X_train_arr = np.array(X_train_list)
X_test_arr = np.array(X_test_list)
y_train_arr = np.array(y_train_list)
y_test_arr = np.array(y_test_list)

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=13)

X_train_arr_reshaped = X_train_arr.reshape(-1,26) #mixing all folds together before applying PCA
X_test_arr_reshaped = X_test_arr.reshape(-1,26)
print(X_train_arr_reshaped.shape)

pca.fit(X_train_arr_reshaped)
X_train_arr = pca.transform(X_train_arr_reshaped)
X_test_arr = pca.transform(X_test_arr_reshaped)
X_train_arr = X_train_arr.reshape(5,4535,13) #reshaping back to original shape
X_test_arr = X_test_arr.reshape(5,1134,13)

In [None]:
X_train_arr.shape, X_test_arr.shape, y_train_arr.shape, y_test_arr.shape

Neural Net for binary classification 5x32x64x32x1

# NN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Convert numpy arrays or pandas DataFrames to PyTorch tensors if needed
X_train_tens = torch.tensor(X_train_arr, dtype=torch.float32)
y_train_tens = torch.tensor(y_train_arr, dtype=torch.long)

# Defining a simple neural network class for binary classification
class NeuralNet(nn.Module):
    def __init__(self, input_size, activation):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.activation = activation       
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 1)# Output layer with single neuron (binary classification)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))  # Apply sigmoid activation for binary classification of final output
        return x


## Training loop

In [None]:




act_losses = [] 
activation_functions = [nn.ReLU(), nn.Sigmoid(), nn.Tanh(), nn.Hardtanh()]
act_colors = {"ReLU": "red", "Sigmoid": "blue", "Tanh": "green", "Hardtanh": "orange"}
for act in activation_functions:
    SEED = 1234
    torch.manual_seed(SEED) # Added seed for reproducibility for all the activation functions
    losses = []
    for i in range(len(X_train_tens)):
        # Initialize the neural network
        input_size = X_train_tens[i].shape[1]
        model = NeuralNet(input_size, act)
        if i > 0:
            model.load_state_dict(torch.load(f'models/activation_test/{type(act).__name__}/fold{i-1}_binary_classification_model.pth')) #Reuses the model from the previous fold

        # Define loss function and optimizer
        criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
        optimizer = optim.Adam(model.parameters())

        # Prepare data for training using DataLoader
        batch_size = 32
        train_dataset = TensorDataset(X_train_tens[i], y_train_tens[i])
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training loop
        num_epochs = 15
        temp_loss = []
        for epoch in range(num_epochs):
            running_loss = 0.0
            for inputs, labels in train_loader:
                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                outputs = model(inputs)
                outputs = outputs.squeeze(dim=1)  # Remove extra dimension for binary classification
                loss = criterion(outputs, labels.float())  # Calculate loss

                # Backward pass and optimize
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

            print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")
            temp_loss.append(running_loss / len(train_loader))
        losses.append(temp_loss)

        # Assuming X_test is your test data
        model.eval()  #Switch to evaluation mode
        with torch.no_grad():
            predictions = model(X_train_tens[i])
            predictions = (predictions > 0.5).int()  # Convert probabilities to binary predictions (0 or 1)


        model_dir = f'./models/activation_test/{type(act).__name__}/'
        # Check if the directory exists
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)

        #Saving the model for later use
        model_path = os.path.join(model_dir, f'fold{i}_binary_classification_model.pth')
        torch.save(model.state_dict(), model_path)
    act_losses.append(losses)

In [None]:
import matplotlib.pyplot as plt

act_mean_losses = np.mean(act_losses, axis=1) #Mean loss for each activation function

for i, loss in enumerate(act_mean_losses):
    name_act = type(activation_functions[i]).__name__
    c = act_colors[name_act]
    plt.plot(loss, "-o", label=name_act, color = c)
plt.legend()
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss vs. Epoch for Different Activation Functions")
plt.show()

### Testing last epoch on training data

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef, accuracy_score, precision_score, recall_score, confusion_matrix

for act in activation_functions:
    print(f"Activation function: {type(act).__name__}")
    for i in range(len(X_train_tens)):
        print(f"Evaluating model on fold: {i}")
        # Load the trained model
        input_size = input_size  
        model = NeuralNet(input_size, act)
        model.load_state_dict(torch.load(f'models/activation_test/{type(act).__name__}/fold{i}_binary_classification_model.pth')) 


        X_train = X_train_tens[i].float()
        y_train = y_train_tens[i].float()

        #Prepare test dataset and dataloader
        batch_size = 64
        train_dataset = TensorDataset(X_train, y_train)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)  #No need to shuffle for testing

        model.eval()  #Switch to evaluation mode
        y_true = []
        y_pred = []
        with torch.no_grad(): #No need for gradient on test run
            for inputs, labels in train_loader:
                outputs = model(inputs)
                predicted = (outputs > 0.5).float()  # Convert probabilities to binary predictions (0 or 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())

        # Calculate Matthews Correlation Coefficient (MCC)
        mcc_tr = matthews_corrcoef(y_true, y_pred)

        print(f"Training: MCC-score: {mcc_tr}, check against the test set for overfitting")


### Testing the model on the test set

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef, accuracy_score, precision_score, recall_score, confusion_matrix


act_all_metrics = {}
act_all_confusion_matrices = {}

for act in activation_functions:
    all_metrics = {}
    all_confusion_matrices = {}
    act_name = type(act).__name__
    print(f"Activation function: {act_name}")
    for i in range(len(X_train_tens)):
        print(f"Evaluating model on fold: {i}")
        # Load the trained model
        input_size = input_size  # Assuming the input size based on X_train
        model = NeuralNet(input_size, act)
        model.load_state_dict(torch.load(f'models/activation_test/{act_name}/fold{i}_binary_classification_model.pth'))  # Load the trained model state 

        X_test = torch.tensor(X_test_arr[i], dtype=torch.float32)
        y_test = torch.tensor(y_test_arr[i], dtype=torch.float32)

        #Prepare test dataset and dataloader
        batch_size = 64
        test_dataset = TensorDataset(X_test, y_test)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)  #No need to shuffle for testing

        model.eval()  #Switch to evaluation mode
        y_true = []
        y_pred = []
        with torch.no_grad(): #No need to calculate gradients in test run
            for inputs, labels in test_loader:
                outputs = model(inputs)
                predicted = (outputs > 0.5).float()  # Convert probabilities to binary predictions (0 or 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())

        # Calculate F1 score
        f1 = f1_score(y_true, y_pred)

        #Calculate Matthews Correlation Coefficient (MCC)
        mcc = matthews_corrcoef(y_true, y_pred)

        #Computes accuracy, precision, recall and conf. matrix
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average='binary') #Binary average, since we have a binary classification problem
        recall = recall_score(y_true, y_pred, average='binary')  
        conf_matrix = confusion_matrix(y_true, y_pred, labels=[1,0])

        #Stores the metrics in a dictionary for easy plotting
        metrics = {'F1 Score': f1, 'MCC': mcc, 'Accuracy': accuracy, 'Precision': precision, 'Recall': recall}
        
        all_metrics[f"fold_{i}"] = metrics
        all_confusion_matrices[f"fold_{i}"] = conf_matrix
        print(f"Testing: {metrics['MCC']}")

    #Saving the metrics and confusion matrices with respective act. func. for later use
    act_all_metrics[act_name] = all_metrics
    act_all_confusion_matrices[act_name] = all_confusion_matrices


## Metrics comparison

Using the 4. fold (index=3) as the best result

In [None]:
metrics_3 = {key: value["fold_3"] for key, value in act_all_metrics.items() if "fold_3" in value}
confusion_matrix_3 = {key: value["fold_3"] for key, value in act_all_confusion_matrices.items() if "fold_3" in value}

In [None]:
data_metric = pd.DataFrame(metrics_3)
#Reset the index to make 'Metrics' a column
data_metric = data_metric.reset_index().rename(columns={'index': 'Metrics'})


data_metric = pd.melt(data_metric, id_vars='Metrics', var_name='Activation Function', value_name='Value')

data_metric = data_metric.set_index(['Metrics', 'Activation Function']).Value
colors = ["orange", "red", "blue", "green"]
data_metric.unstack().plot(kind='bar', stacked=False, color = colors)
plt.ylim(0.8,1)
plt.title("Metrics for Different Activation Functions")

## Confusion matrix comparison

In [None]:
import seaborn as sns

fig, axes = plt.subplots(1, len(activation_functions), figsize=(20, 5))

for i, act in enumerate(activation_functions):
    act_name = type(act).__name__
    conf_matrix = confusion_matrix_3[act_name]

    #Creates a heatmap for the confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=axes[i])

    axes[i].set_title(f'Confusion Matrix {act_name}')
    axes[i].set_xlabel('Predicted')
    axes[i].set_ylabel('True')

fig.suptitle("Confusion Matrices for Different Activation Functions", fontsize=25)
plt.tight_layout()
plt.show()