In [1]:
import pandas as pd
import os
import numpy as np
import h5py

#### Global variables

In [2]:
font_size = 16
header_font_size = 20
SEED = 0

# Data Preprocessing

In [3]:
mat_file = "original_data/ReadBrownDwarf.mat"
data_path = "original_data/"
idTE = np.load(data_path + "idTE.npy")
idTR = np.load(data_path + "idTR.npy")
labelTE = np.load(data_path + "labelTE.npy")
labelTR = np.load(data_path + "labelTR.npy")

In [4]:
#Extracting the data from the mat file
with h5py.File(mat_file, 'r') as f:
    data = f["data"]
    data = pd.DataFrame(data).T

data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,326.808170,2.686124,5.5560,0.0000,5.7600,0.0000,5.9470,0.0000,6.0720,0.0000,...,0.162,0.050,0.015,0.044,5.562,5.539,5.479,0.017,0.026,0.020
1,321.200440,25.880459,0.0000,0.0000,7.6116,0.0010,0.0000,0.0000,7.5167,0.0010,...,0.059,0.020,0.016,0.059,6.896,6.921,6.932,0.026,0.024,0.016
2,144.038450,-12.459262,10.5120,0.0000,10.6920,0.0000,10.8660,0.0000,10.9890,0.0000,...,0.022,0.020,0.074,0.000,10.408,10.400,10.378,0.026,0.021,0.023
3,209.662170,21.696203,0.0000,0.0000,0.0000,0.0000,6.8603,0.1042,0.0000,0.0000,...,0.149,0.046,0.015,0.037,5.671,5.717,5.704,0.023,0.063,0.020
4,65.162550,-20.639620,0.0000,0.0000,0.0000,0.0000,6.3206,0.0000,5.9504,0.0031,...,0.169,0.072,0.015,0.036,5.430,5.402,5.333,0.054,0.036,0.017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5664,13.245956,49.443574,9.1227,0.2347,9.1823,0.0010,0.0000,0.0000,9.0066,0.0010,...,0.022,0.019,0.021,0.144,8.358,8.389,8.398,0.027,0.017,0.016
5665,29.647854,48.432832,0.0000,0.0000,9.5538,0.0010,0.0000,0.0000,10.7253,0.0010,...,0.023,0.019,0.026,0.297,8.778,8.804,8.766,0.021,0.016,0.020
5666,346.990957,54.326940,0.0000,0.0000,9.8405,0.0010,9.2025,0.0010,10.1089,0.0374,...,0.024,0.020,0.030,0.327,9.180,9.187,9.180,0.021,0.020,0.022
5667,331.173131,46.427253,10.4915,0.0630,10.2538,0.0239,10.0057,0.0074,10.4418,0.0315,...,0.023,0.020,0.033,0.000,9.693,9.678,9.622,0.024,0.029,0.023


In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

Imputing 0s with Linear Regression

In [6]:
from sklearn.linear_model import LinearRegression
columns_with_zero = data.columns[(data == 0).any()].tolist()

print(columns_with_zero)
target_columns = columns_with_zero

for target_column in target_columns:
    df_zeros = data[data[target_column] == 0]
    df_no_zeros = data[data[target_column] != 0]

    X_train = df_no_zeros.drop(columns=target_column)
    y_train = df_no_zeros[target_column]

    X_test = df_zeros.drop(columns=target_column)

    model = LinearRegression()
    model.fit(X_train, y_train)

    y_test = model.predict(X_test)

    # Replace the zero values in the original dataframe
    data.loc[data[target_column] == 0, target_column] = y_test


[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]


Separating data into folds, and folds into training and test data, before scaling it

In [7]:
X_train_list = []
X_test_list = []
y_train_list = []
y_test_list = []
for i in range(len(idTR)):
    X_train_list.append(data.iloc[idTR[i] - 1]) #idTR is 1 indexed
    X_test_list.append(data.iloc[idTE[i] - 1])  #idTE is 1 indexed
    y_train_list.append(labelTR[i])
    y_test_list.append(labelTE[i])
    #Scaling
    scaler = StandardScaler()
    X_train_list[i] = scaler.fit_transform(X_train_list[i])
    X_test_list[i] = scaler.transform(X_test_list[i])

X_train_arr = np.array(X_train_list)
X_test_arr = np.array(X_test_list)
y_train_arr = np.array(y_train_list)
y_test_arr = np.array(y_test_list)

In [8]:
X_train_arr.shape, X_test_arr.shape, y_train_arr.shape, y_test_arr.shape

((5, 4535, 26), (5, 1134, 26), (5, 4535), (5, 1134))

# NN

Network structure: $26$ x $10$ x $5$ x $1$

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Convert numpy arrays or pandas DataFrames to PyTorch tensors if needed
X_train_tens = torch.tensor(X_train_arr, dtype=torch.float32)
y_train_tens = torch.tensor(y_train_arr, dtype=torch.long)

# Defining a simple neural network class for binary classification
class NeuralNet(nn.Module):
    def __init__(self, input_size, activation):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 10)
        self.activation = activation       
        self.fc2 = nn.Linear(10, 5)
        self.fc3 = nn.Linear(5, 1)# Output layer with single neuron (binary classification)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))  # Apply sigmoid activation for binary classification of final output
        return x


ModuleNotFoundError: No module named 'torch.nn'

## Training loop

In [None]:
torch.manual_seed(SEED) # Adding seed for reproducibility for all the activation functions
act_losses = [] 
activation_functions = [nn.ReLU(), nn.Sigmoid(), nn.Tanh(), nn.Hardtanh()]
act_colors = {"ReLU": "red", "Sigmoid": "blue", "Tanh": "green", "Hardtanh": "orange"}

for act in activation_functions:
    losses = []
    for i in range(len(X_train_tens)):
        # Initializing
        input_size = X_train_tens[i].shape[1]
        model = NeuralNet(input_size, act)
    
        # Define loss function and optimizer
        criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
        optimizer = optim.Adam(model.parameters())

        # Prepare data for training using DataLoader
        batch_size = 64
        train_dataset = TensorDataset(X_train_tens[i], y_train_tens[i])
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        # Training the model
        num_epochs = 15
        temp_loss = []
        for epoch in range(num_epochs):
            running_loss = 0.0
            for inputs, labels in train_loader:
                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                outputs = model(inputs)
                outputs = outputs.squeeze(dim=1)  # Remove extra dimension for binary classification
                loss = criterion(outputs, labels.float())  

                # Backward pass and optimize
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

            print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")
            temp_loss.append(running_loss / len(train_loader))
        losses.append(temp_loss)

        model.eval()  #Switch to evaluation mode
        with torch.no_grad():
            predictions = model(X_train_tens[i])
            predictions = (predictions > 0.5).int()  # Convert probabilities to binary predictions (0 or 1)


        model_dir = f'./models/activation_test/{type(act).__name__}/'
        # Checking if the directory exists
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)

        # Saving the model for later use
        model_path = os.path.join(model_dir, f'fold{i}_binary_classification_model.pth')
        torch.save(model.state_dict(), model_path)
    act_losses.append(losses)

Loss on $\bold{training}$ data plotted over epochs

In [None]:
import matplotlib.pyplot as plt

act_mean_losses = np.mean(act_losses, axis=1) # Mean loss for each activation function

# Plotting each model
for i, loss in enumerate(act_mean_losses):
    name_act = type(activation_functions[i]).__name__
    c = act_colors[name_act]
    plt.plot(loss, "-o", label=name_act, color = c)
plt.legend(fontsize = font_size-2)
plt.xlabel("Epoch", size = font_size)
plt.ylabel("Loss", size = font_size)
plt.title("Training Loss vs. Epoch for Different Activation Functions", size = font_size)
plt.show()

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef, accuracy_score, precision_score, recall_score, confusion_matrix

for act in activation_functions:
    print(f"Activation function: {type(act).__name__}")
    for i in range(len(X_train_tens)):
        print(f"Evaluating model on fold: {i}")
        # Load the trained model
        input_size = input_size  
        model = NeuralNet(input_size, act)
        model.load_state_dict(torch.load(f'models/activation_test/{type(act).__name__}/fold{i}_binary_classification_model.pth')) 

        X_train = X_train_tens[i].float()
        y_train = y_train_tens[i].float()

        #Prepare test dataset and dataloader
        batch_size = 64
        train_dataset = TensorDataset(X_train, y_train)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)  #No need to shuffle for testing

        model.eval()  #Switch to evaluation mode
        y_true = []
        y_pred = []
        with torch.no_grad(): #No need for gradient on test run
            for inputs, labels in train_loader:
                outputs = model(inputs)
                predicted = (outputs > 0.5).float()  # Convert probabilities to binary predictions (0 or 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())

        # Calculate Matthews Correlation Coefficient (MCC)
        mcc_tr = matthews_corrcoef(y_true, y_pred)

        print(f"Training: MCC-score: {mcc_tr}, check against the test set for overfitting")


### Testing the model on the test set

In [None]:
act_all_metrics = {}
act_mean_metrics = {}
act_all_confusion_matrices = {}
act_mean_confusion_matrices = {}

for act in activation_functions:
    all_metrics = {}
    all_confusion_matrices = {}
    act_name = type(act).__name__
    print(f"Activation function: {act_name}")
    print("---------------------------------")
    temp_F1, temp_MCC, temp_acc, temp_prec, temp_rec = 0,0,0,0,0
    for i in range(len(X_train_tens)):
        print(f"    Evaluating model on fold: {i}")
        # Loading the previously trained model
        input_size = input_size 
        model = NeuralNet(input_size, act)
        model.load_state_dict(torch.load(f'models/activation_test/{act_name}/fold{i}_binary_classification_model.pth'))

        X_test = torch.tensor(X_test_arr[i], dtype=torch.float32)
        y_test = torch.tensor(y_test_arr[i], dtype=torch.float32)

        # Prepare test dataset and dataloader
        batch_size = 64
        test_dataset = TensorDataset(X_test, y_test)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)  #No need to shuffle for testing

        model.eval()  # Switch to evaluation mode
        y_true = []
        y_pred = []
        with torch.no_grad(): # No need to calculate gradients in test run
            for inputs, labels in test_loader:
                outputs = model(inputs)
                predicted = (outputs > 0.5).float()  # Convert probabilities to binary predictions (0 or 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())

        # Calculating different scores
        f1 = f1_score(y_true, y_pred)
        mcc = matthews_corrcoef(y_true, y_pred) #Calculate Matthews Correlation Coefficient (MCC)
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average='binary') #Binary average, since we have a binary classification problem
        recall = recall_score(y_true, y_pred, average='binary')  
        conf_matrix = confusion_matrix(y_true, y_pred, labels=[1,0])

        #Stores the metrics in a dictionary for easy plotting
        metrics = {'F1 Score': f1, 'MCC': mcc, 'Accuracy': accuracy, 'Precision': precision, 'Recall': recall}
        
        # Saving values to calculate averages
        temp_F1 += f1
        temp_acc += accuracy
        temp_prec += precision
        temp_rec += recall
        temp_MCC += mcc

        all_metrics[f"fold_{i}"] = metrics
        all_confusion_matrices[f"fold_{i}"] = conf_matrix
        print(f"Testing MCC score: {metrics['MCC']}")
    print("---------------------------------")
    print("\n")
    #Saving the metrics and confusion matrices with respective act. func. for later use
    act_all_metrics[act_name] = all_metrics
    act_all_confusion_matrices[act_name] = all_confusion_matrices
    act_mean_metrics[act_name] = {'F1 Score': temp_F1/len(X_train_tens), 'MCC': temp_MCC/len(X_train_tens), 'Accuracy': temp_acc/len(X_train_tens), 'Precision': temp_prec/len(X_train_tens), 'Recall': temp_rec/len(X_train_tens)}
    act_mean_confusion_matrices[act_name] = temp_MCC/len(X_train_tens)


## Metrics comparison

In [None]:
data_metric = pd.DataFrame(act_mean_metrics)
data_metric = data_metric.reset_index().rename(columns={'index': 'Metrics'})


data_metric = pd.melt(data_metric, id_vars='Metrics', var_name='Activation Function', value_name='Value')

data_metric = data_metric.set_index(['Metrics', 'Activation Function']).Value
colors = ["orange", "red", "blue", "green"]
data_metric.unstack().plot(kind='bar', stacked=False, color = colors, fontsize = font_size-3)
plt.legend(fontsize = font_size-2, loc = 'lower center')
plt.ylim(0.8,1)
plt.title("Average Metrics for Different Activation Functions", size = header_font_size-5)

In [None]:
act_mean_confusion_matrices
# act_all_confusion_matrices
act_all_confusion_matrices

mean_confusion_matrices = {}
for act_func, matrices in act_all_confusion_matrices.items():
    stacked_matrices = np.dstack(list(matrices.values()))
    mean_matrix = np.mean(stacked_matrices, axis=2, dtype=int)
    mean_confusion_matrices[act_func] = mean_matrix

# print(mean_confusion_matrices)

## Confusion matrix comparison

In [None]:
import seaborn as sns

fig, axes = plt.subplots(1, len(activation_functions), figsize=(20, 5))

for i, act in enumerate(activation_functions):
    act_name = type(act).__name__
    conf_matrix = mean_confusion_matrices[act_name]

    #Creates a heatmap for the confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=axes[i], cbar=False, annot_kws={"size": font_size})

    axes[i].set_title(f'{act_name}', size = header_font_size)
    axes[i].set_xlabel('Predicted', size = font_size)
    axes[i].set_ylabel('True', size = font_size)
    axes[i].tick_params(axis='both', which='major', labelsize=font_size)

fig.suptitle("Average Confusion Matrices for Different Activation Functions", fontsize=header_font_size + 5)
plt.tight_layout()
plt.show()