In [None]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_processed, y_train)
rf_preds = rf_model.predict(X_test_processed)

print("Random Forest Performance:")
print(classification_report(y_test, rf_preds))
print("Accuracy:", accuracy_score(y_test, rf_preds))

# Confusion matrix for Random Forest
plt.figure(figsize=(6,4))
sns.heatmap(confusion_matrix(y_test, rf_preds), annot=True, fmt='d', cmap='Blues')
plt.title('Random Forest Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# XGBoost Model
xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_model.fit(X_train_processed, y_train)
xgb_preds = xgb_model.predict(X_test_processed)

print("\nXGBoost Performance:")
print(classification_report(y_test, xgb_preds))
print("Accuracy:", accuracy_score(y_test, xgb_preds))

# Confusion matrix for XGBoost
plt.figure(figsize=(6,4))
sns.heatmap(confusion_matrix(y_test, xgb_preds), annot=True, fmt='d', cmap='Greens')
plt.title('XGBoost Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

from sklearn.metrics import precision_recall_fscore_support

# Evaluate Random Forest
rf_precision, rf_recall, rf_f1, _ = precision_recall_fscore_support(y_test, rf_preds, average='weighted')
rf_accuracy = accuracy_score(y_test, rf_preds)

# Evaluate XGBoost
xgb_precision, xgb_recall, xgb_f1, _ = precision_recall_fscore_support(y_test, xgb_preds, average='weighted')
xgb_accuracy = accuracy_score(y_test, xgb_preds)

# Organize metrics into a DataFrame
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1-score'],
    'Random Forest': [rf_accuracy, rf_precision, rf_recall, rf_f1],
    'XGBoost': [xgb_accuracy, xgb_precision, xgb_recall, xgb_f1]
})

# Format metrics as percentages
metrics_df[['Random Forest', 'XGBoost']] = metrics_df[['Random Forest', 'XGBoost']].applymap(lambda x: f"{x:.2%}")

# Display the DataFrame as a table
print(metrics_df.to_string(index=False))