<a href="https://colab.research.google.com/github/ubaidillah-chem/fouling-ml/blob/main/05_MLP_model_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

In [None]:
# === 0. Load dataset ===
dataset = pd.read_csv('gdrive/MyDrive/dataset_filtered_by_top_pca_loadings.csv')
X = dataset.drop(columns=['Rf']).values.astype('float64')
y = dataset['Rf'].values.astype('float64').reshape(-1, 1)
X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)


In [None]:
X.shape

In [None]:
# === 1. Scale full dataset ===
scaler_full = StandardScaler()
X_scaled_full = scaler_full.fit_transform(X)
X_tensor_full = torch.tensor(X_scaled_full, dtype=torch.float32)
y_tensor_full = torch.tensor(y, dtype=torch.float32)


In [None]:
class MLPModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 1),  # Output: predicted Rf
            nn.Softplus()  # Constrain the prediction to strictly positive values
        )

    def forward(self, x):
        return self.model(x)

In [None]:
# === 2. Train final model ===
model_final = MLPModel(input_dim=X.shape[1])
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model_final.parameters(), lr=1e-3)

final_loader = DataLoader(TensorDataset(X_tensor_full, y_tensor_full), batch_size=64, shuffle=True)

# Training loop
for epoch in range(100):
    model_final.train()
    for xb, yb in final_loader:
        pred = model_final(xb)
        loss = criterion(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Save the trained model
torch.save(model_final.state_dict(), 'gdrive/MyDrive/model_final.pth')


In [None]:
# Load the saved model
model_final = MLPModel(input_dim=X.shape[1])
model_final.load_state_dict(torch.load('gdrive/MyDrive/model_final.pth'))


In [None]:
# === 3. Predict using final model ===
model_final.eval()
with torch.no_grad():
    y_pred_final = model_final(X_tensor_full).numpy().flatten()
    y_true_final = y.flatten()
    residuals_final = y_true_final - y_pred_final


In [None]:
# === 4. Plot Actual vs. Predicted ===
df = pd.read_csv('gdrive/MyDrive/dataset_with_residuals.csv')
y_true_final = df['Actual_Rf'].values
y_pred_final = df['Predicted_Rf'].values

plt.figure(figsize=(5, 5), dpi=plt.rcParams['figure.dpi'])
plt.scatter(y_true_final, y_pred_final, alpha=0.6, edgecolor='k')
plt.plot([min(y_true_final), max(y_true_final)],
         [min(y_true_final), max(y_true_final)],
         color='red', linestyle='--', label='Perfect Prediction (y = x)')
plt.xlabel('Actual Rf')
plt.ylabel('Predicted Rf')
plt.title('Actual vs. Predicted Rf (Final Model)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# === 5. Plot Residuals ===
plt.figure(figsize=(6, 4.5))
residuals_final = df['Residual'] = y_true_final - y_pred_final
plt.scatter(y_true_final, residuals_final, alpha=0.6, edgecolor='k')
plt.axhline(y=0, color='red', linestyle='--', label='Zero Residual')
plt.xlabel('Actual Rf')
plt.ylabel('Residual (Actual âˆ’ Predicted)')
plt.title('Residuals vs. Actual Rf (Final Model)')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd

# Convert everything to a DataFrame
df_full = pd.DataFrame(X, columns=dataset.drop(columns=['Rf']).columns)
df_full['Actual_Rf'] = y_true_final
df_full['Predicted_Rf'] = y_pred_final
df_full['Residual'] = residuals_final

df_with_run = pd.read_csv('gdrive/MyDrive/dataset_with_run_num.csv').drop(index=range(389, 432)).reset_index(drop=True)
df_full['Run #'] = df_with_run['Run #']
df_full.to_csv('gdrive/MyDrive/dataset_with_residuals.csv', index=False)
