<a href="https://colab.research.google.com/github/ubaidillah-chem/fouling-ml/blob/main/06_Importance_from_weights.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import torch
import torch.nn as nn

from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

dataset = pd.read_csv('gdrive/MyDrive/dataset_filtered_by_top_pca_loadings.csv').drop(index=range(389, 432))
X = dataset.drop(columns=['Rf']).values.astype('float64')
y = dataset['Rf'].values.astype('float64').reshape(-1, 1)


In [None]:
class MLPModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 1),  # Output: predicted Rf
            nn.Softplus()  # Constrain the prediction to strictly positive values
        )

    def forward(self, x):
        return self.model(x)

model_final = MLPModel(input_dim=X.shape[1])
model_final.load_state_dict(torch.load('gdrive/MyDrive/model_final.pth'))
model_final.eval()


In [None]:
import numpy as np

# Extract weights from the trained model
input_to_hidden_weights = model_final.model[0].weight.data.numpy().T  # Transpose to get features x hidden_units
hidden_to_output_weights = model_final.model[2].weight.data.numpy().T   # Transpose to get hidden_units x output_units

# Calculate effective weights
effective_weights = np.dot(input_to_hidden_weights, hidden_to_output_weights)

# Get the feature names from the original dataset
feature_names = dataset.drop(columns=['Rf']).columns.tolist()

# Create a DataFrame to display effective weights with features
effective_weights_df = pd.DataFrame({
    'Feature': feature_names,
    'Effective Weight': effective_weights.flatten() # Flatten to a 1D array
})

# Calculate absolute effective weights and signs
effective_weights_df['Absolute Effective Weight'] = effective_weights_df['Effective Weight'].abs()
effective_weights_df['Sign'] = np.sign(effective_weights_df['Effective Weight'])

effective_weights_df.to_csv('gdrive/MyDrive/effective_weights.csv', index=False)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Sort the DataFrame by absolute effective weight in descending order and select the top 12
effective_weights_sorted = effective_weights_df.sort_values(by='Absolute Effective Weight', ascending=False).head(10)

# Create a bar plot
plt.figure(figsize=(6, 3), dpi=plt.rcParams['figure.dpi']*5) # Adjust figure size for fewer bars
ax = sns.barplot(x='Effective Weight', y='Feature', data=effective_weights_sorted, color='#082c5c', legend=False)
plt.xlabel('Effective Weight')
plt.ylabel('')

# Add labels to the bars
for container in ax.containers:
    ax.bar_label(container, fmt='%.2f', label_type='center', color='white')

plt.tight_layout()
plt.show()