In [4]:
import os

# Optimize CPU Usage
os.environ["LOKY_MAX_CPU_COUNT"] = "16"

In [7]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from pytorch_tabnet.tab_model import TabNetClassifier


# Load Preprocessed Dataset
df = pd.read_csv("ProcessedData.csv")

# Split into Features & Labels
X = df.drop(columns=['SepsisLabel'])
y = df['SepsisLabel']

# Apply SMOTE for Class Balancing (Increase Sepsis Cases to 60%)
smote = SMOTE(sampling_strategy=0.6, random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split into Train & Test Sets
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, stratify=y_resampled, random_state=42
)

print("\nAfter SMOTE Balancing:")
print(pd.Series(y_resampled).value_counts(normalize=True) * 100)


# Optimized TabNet Model
tabnet = TabNetClassifier(
    optimizer_fn=torch.optim.AdamW,
    optimizer_params=dict(lr=0.000400988413449954, weight_decay=0.0013603924614597604),
    scheduler_params={"step_size":10, "gamma":0.9},  
    scheduler_fn=torch.optim.lr_scheduler.StepLR,
    n_d=24,
    n_a=56,
    n_steps=3,
    gamma=1.2,
    mask_type="sparsemax",
    device_name='cuda'
)

print("\nTraining TabNet Model...")

# Train the Model with Optimized Settings
tabnet.fit(
    X_train=X_train.values, y_train=y_train,
    eval_set=[(X_test.values, y_test)],
    eval_metric=["auc"],
    max_epochs=200,
    patience=20,
    batch_size=1024,
    virtual_batch_size=256
)

print("\nModel Training Completed!")

# Save the Updated Model
tabnet.save_model("tabnet_optuna")
print("\nOptimized Model Saved Successfully!")



After SMOTE Balancing:
SepsisLabel
0    62.500005
1    37.499995
Name: proportion, dtype: float64

Training TabNet Model...




epoch 0  | loss: 0.60833 | val_0_auc: 0.79257 |  0:02:17s
epoch 1  | loss: 0.50219 | val_0_auc: 0.8334  |  0:04:38s
epoch 2  | loss: 0.45859 | val_0_auc: 0.85195 |  0:07:02s
epoch 3  | loss: 0.43371 | val_0_auc: 0.85836 |  0:09:10s
epoch 4  | loss: 0.41918 | val_0_auc: 0.85631 |  0:11:16s
epoch 5  | loss: 0.4102  | val_0_auc: 0.86561 |  0:13:17s
epoch 6  | loss: 0.40464 | val_0_auc: 0.86576 |  0:15:21s
epoch 7  | loss: 0.39943 | val_0_auc: 0.82165 |  0:17:31s
epoch 8  | loss: 0.39571 | val_0_auc: 0.86307 |  0:19:35s
epoch 9  | loss: 0.39196 | val_0_auc: 0.8708  |  0:21:33s
epoch 10 | loss: 0.38867 | val_0_auc: 0.87594 |  0:23:31s
epoch 11 | loss: 0.38688 | val_0_auc: 0.86861 |  0:25:29s
epoch 12 | loss: 0.38431 | val_0_auc: 0.7725  |  0:27:26s
epoch 13 | loss: 0.382   | val_0_auc: 0.85446 |  0:29:23s
epoch 14 | loss: 0.38053 | val_0_auc: 0.88066 |  0:31:32s
epoch 15 | loss: 0.3784  | val_0_auc: 0.8806  |  0:33:33s
epoch 16 | loss: 0.37583 | val_0_auc: 0.85122 |  0:35:33s
epoch 17 | los




Model Training Completed!
Successfully saved model at tabnet_optuna.zip

Optimized Model Saved Successfully!


In [8]:
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Predict on test set
y_pred = tabnet.predict(X_test.values)

# Print evaluation metrics
print(f"Final Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"AUC-ROC Score: {roc_auc_score(y_test, y_pred):.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Final Accuracy: 0.8462
AUC-ROC Score: 0.8356

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.88      0.88    292567
           1       0.80      0.79      0.79    175540

    accuracy                           0.85    468107
   macro avg       0.84      0.84      0.84    468107
weighted avg       0.85      0.85      0.85    468107



In [17]:
import pandas as pd
import numpy as np
import torch
from pytorch_tabnet.tab_model import TabNetClassifier
import os

# ✅ Load Feature Names from Training Data
training_data_path = "ProcessedData.csv"
df_train = pd.read_csv(training_data_path)
feature_columns = [col for col in df_train.columns if col != 'SepsisLabel']

# ✅ Check GPU Availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"\nUsing Device: {device.upper()}")

# ✅ Load Trained TabNet Model
def load_tabnet_model(model_path="tabnet_sepsis_optimized.zip"):
    model = TabNetClassifier(device_name=device)  
    model.load_model(model_path)
    print("\nTabNet Model Loaded Successfully!")
    return model

# ✅ Load & Preprocess New Data
def load_new_data(file_path="Synthetic_TestData.csv"):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Error: {file_path} not found!")

    # Load new patient data
    df = pd.read_csv(file_path)

    # Ensure all expected columns exist and are in correct order
    missing_features = set(feature_columns) - set(df.columns)
    extra_features = set(df.columns) - set(feature_columns)

    if missing_features:
        raise ValueError(f"Missing features in test data: {missing_features}")
    if extra_features:
        print(f"Warning: Extra features detected: {extra_features}. These will be ignored.")
        df = df[feature_columns]  # Keep only the expected features

    print(f"\nNew Data Loaded: {df.shape[0]} samples for prediction.")
    return df

# ✅ Run Predictions
def predict_sepsis(model, new_data):
    X_test = new_data.values
    y_pred_prob = model.predict_proba(X_test)[:, 1]

    # Probability Analysis
    print("\nProbability Analysis:")
    print(f"  - Min Probability: {y_pred_prob.min():.4f}")
    print(f"  - Max Probability: {y_pred_prob.max():.4f}")
    print(f"  - Mean Probability: {y_pred_prob.mean():.4f}")
    print(f"  - Sample Probabilities: {y_pred_prob[:20]}")  # Print more samples

    # ✅ Dynamic Thresholding based on probability distribution
    threshold = np.percentile(y_pred_prob, 70)  # Adjust based on distribution

    print(f"\n🔍 Dynamic Classification Threshold: {threshold:.4f}")

    # Apply dynamic threshold
    y_pred = (y_pred_prob > threshold).astype(int)

    # Add Predictions to DataFrame
    new_data["Sepsis_Prediction"] = y_pred
    new_data["Sepsis_Probability"] = y_pred_prob

    # Class Distribution
    print("\n🔍 Class Distribution in Predictions:")
    print(new_data["Sepsis_Prediction"].value_counts(normalize=True) * 100)

    print("\nPredictions Completed!")
    return new_data

# ✅ Save Predictions to "Results.csv"
def save_predictions(predictions, output_file="Results.csv"):
    predictions.to_csv(output_file, index=False)
    print(f"\nResults saved in '{output_file}'.")

# ✅ Execute Inference Pipeline
if __name__ == "__main__":
    model = load_tabnet_model("tabnet_sepsis_optimized.zip")  
    new_data = load_new_data("Synthetic_TestData.csv")  
    predictions = predict_sepsis(model, new_data)  
    save_predictions(predictions, "Results.csv")



Using Device: CUDA

TabNet Model Loaded Successfully!

New Data Loaded: 50 samples for prediction.

Probability Analysis:
  - Min Probability: 0.0128
  - Max Probability: 0.9953
  - Mean Probability: 0.3965
  - Sample Probabilities: [0.5268043  0.5452135  0.1396937  0.16169927 0.17242578 0.48984024
 0.8126659  0.26892436 0.5107759  0.43663484 0.4324939  0.79088503
 0.9952865  0.2570397  0.29459655 0.2514579  0.25212032 0.37399843
 0.52422655 0.01279105]

🔍 Dynamic Classification Threshold: 0.4839

🔍 Class Distribution in Predictions:
Sepsis_Prediction
0    70.0
1    30.0
Name: proportion, dtype: float64

Predictions Completed!

Results saved in 'Results.csv'.




In [19]:
import pandas as pd
import numpy as np
import torch
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import os

# ✅ Set Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"\nUsing Device: {device.upper()}")

# ✅ Load Processed Dataset
data_path = "ProcessedData.csv"
df = pd.read_csv(data_path)

# ✅ Split into Features & Labels
X = df.drop(columns=['SepsisLabel'])
y = df['SepsisLabel']

# ✅ Apply SMOTE for Class Balancing
smote = SMOTE(sampling_strategy=0.6, random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# ✅ Split into Train & Test Sets (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, stratify=y_resampled, random_state=42
)

# ✅ Load Trained TabNet Model
def load_tabnet_model(model_path="tabnet_optuna.zip"):
    model = TabNetClassifier(device_name=device)
    model.load_model(model_path)
    print("\nTabNet Model Loaded Successfully!")
    return model

# ✅ Run Predictions
def predict_sepsis(model, X_test):
    y_pred_prob = model.predict_proba(X_test.values)[:, 1]

    # Convert probabilities to binary predictions
    y_pred = (y_pred_prob > 0.50).astype(int)

    return y_pred, y_pred_prob

# ✅ Load Model & Predict
model = load_tabnet_model("tabnet_optuna.zip")
y_pred, y_pred_prob = predict_sepsis(model, X_test)

# ✅ Evaluate Model Performance
accuracy = accuracy_score(y_test, y_pred)
auc_roc = roc_auc_score(y_test, y_pred_prob)
class_report = classification_report(y_test, y_pred)

print(f"\nFinal Accuracy: {accuracy:.4f}")
print(f"AUC-ROC Score: {auc_roc:.4f}")
print("\nClassification Report:\n", class_report)

# ✅ Save Predictions to CSV
results_df = X_test.copy()
results_df["SepsisLabel"] = y_test.values
results_df["Sepsis_Prediction"] = y_pred
results_df["Sepsis_Probability"] = y_pred_prob

results_df.to_csv("Test_Results.csv", index=False)
print("\nTest Results Saved in 'Test_Results.csv'")



Using Device: CUDA





TabNet Model Loaded Successfully!

Final Accuracy: 0.8462
AUC-ROC Score: 0.9131

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.88      0.88    292567
           1       0.80      0.79      0.79    175540

    accuracy                           0.85    468107
   macro avg       0.84      0.84      0.84    468107
weighted avg       0.85      0.85      0.85    468107


Test Results Saved in 'Test_Results.csv'
