In [None]:
import pandas as pd
df = pd.read_csv("/content/balanced_data.csv")
df.shape

In [None]:
df.info()

In [None]:
# Load your data
X = df.drop("FLAG", axis=1)
y = df["FLAG"]

In [None]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import ElasticNetCV
from sklearn.ensemble import StackingClassifier
import shap
import lime
import lime.lime_tabular
import eli5
from eli5.sklearn import PermutationImportance
import matplotlib.pyplot as plt

# Define best hyperparameters for Random Forest
rf_params = {'max_depth': 10, 'max_features': 0.3139538085100205,
             'min_samples_leaf': 1.7307339350609041, 'min_samples_split': 14.052804288439336,
             'n_estimators': 123}
best_rf = RandomForestClassifier(**rf_params)

# Define best hyperparameters for XGBoost
xgb_params = {'colsample_bytree': 0.9729370974093524, 'learning_rate': 0.48726788904554486,
              'max_depth': 3, 'min_child_weight': 4, 'n_estimators': 105, 'subsample': 0.894412699905483}
best_xgb = XGBClassifier(**xgb_params)

# Define best hyperparameters for Neural Network
def create_model():
    model = Sequential()
    model.add(Dense(units=128, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dense(units=128, activation='relu'))
    model.add(Dense(units=1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

best_nn = create_model()

# Create the stacking ensemble model with Elastic Net
estimators = [
    ('rf', best_rf),
    ('xgb', best_xgb),
    ('nn', best_nn)
]
stacking_model = StackingClassifier(estimators=estimators, final_estimator=ElasticNetCV())

# Train the stacking model
stacking_model.fit(X_train, y_train)

# Evaluate the stacking model
y_pred = stacking_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Explain feature importance using SHAP
explainer = shap.Explainer(stacking_model)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test, plot_type="bar")

# Explain feature importance using LIME
lime_explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns.tolist(), class_names=['0', '1'], mode='classification')
lime_exp = lime_explainer.explain_instance(X_test.iloc[0], stacking_model.predict_proba, num_features=10)
lime_exp.show_in_notebook()

# Explain feature importance using ELI5
perm = PermutationImportance(stacking_model, random_state=42).fit(X_test, y_test)
eli5.show_weights(perm, feature_names=X_test.columns.tolist())

In [None]:
# Calculate evaluation metrics
acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
#auroc = roc_auc_score(y_test, best_xgb_clf.predict_proba(X_test)[:, 1])
#mcc = matthews_corrcoef(y_test, y_pred)
#brier_score = brier_score_loss(y_test, best_xgb_clf.predict_proba(X_test)[:, 1])
cm = confusion_matrix(y_test, y_pred)

# Print evaluation metrics
print(f"Accuracy: {acc}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
#print(f"AUROC: {auroc}")
#print(f"MCC: {mcc}")
#print(f"Brier Score: {brier_score}")
print(f"Confusion Matrix:\n{cm}")

# Plot Confusion Matrix Heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()