# 🎯 Phase 4: Threshold Tuning and Final Evaluation

In this notebook, we tune the decision threshold used for predicting fraud.  
Default `0.5` threshold often underperforms in imbalanced classification.

We aim to:
- Visualize Precision, Recall, and F1 across thresholds
- Choose a threshold that matches real-world risk tolerance
- Finalize model performance metrics

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, f1_score
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='whitegrid')

# Load results from Phase 3 (optional: recompute model)
df = pd.read_csv("../data/isoforest_scored.csv")

In [3]:
import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import (
    precision_recall_curve, f1_score, confusion_matrix, classification_report
)
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='whitegrid')

# Load persisted artifacts
clf = joblib.load("../models/rf_model.joblib")
X_test = pd.read_csv("../data/X_test.csv")
y_test = pd.read_csv("../data/y_test.csv").squeeze()
y_probs = np.load("../data/y_probs.npy")

In [4]:
# Precision, Recall, F1 vs Thresholds
import os
os.makedirs("../assets", exist_ok=True)

# PR-F1 Curve
precision, recall, thresholds = precision_recall_curve(y_test, y_probs)
f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)
thresholds = np.append(thresholds, 1.0)

plt.figure(figsize=(10,6))
plt.plot(thresholds, precision, label="Precision")
plt.plot(thresholds, recall, label="Recall")
plt.plot(thresholds, f1_scores, label="F1 Score")
plt.axvline(x=0.5, color='gray', linestyle='--', label="Default Threshold")
plt.xlabel("Threshold")
plt.ylabel("Score")
plt.title("Precision / Recall / F1 vs Threshold")
plt.legend()
plt.tight_layout()
plt.savefig("../assets/threshold_vs_metrics.png")
plt.close()

# Best threshold eval
best_idx = np.argmax(f1_scores)
best_threshold = thresholds[best_idx]
y_thresh_pred = (y_probs >= best_threshold).astype(int)

# Confusion matrix @ threshold
plt.figure(figsize=(4,3))
sns.heatmap(confusion_matrix(y_test, y_thresh_pred), annot=True, fmt='d', cmap='Oranges')
plt.title(f"Confusion Matrix @ Threshold {best_threshold:.2f}")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.savefig("../assets/final_confusion_matrix_thresh.png")
plt.close()

# Save final report
with open("../assets/final_report_threshold_tuned.txt", "w") as f:
    f.write(classification_report(y_test, y_thresh_pred, digits=4))

print(f"✅ Phase 4 complete. Threshold tuned to {best_threshold:.4f}")

✅ Phase 4 complete. Threshold tuned to 0.2800
