In [None]:
import pandas as pd
import numpy as np
from numpy import random
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import networkx as nx
from sklearn.metrics import classification_report, accuracy_score, log_loss,average_precision_score,confusion_matrix,ConfusionMatrixDisplay
import pickle


import plotly.express as px
import seaborn as sns

#used for calling models from other notebooks
from joblib import load

In [None]:
final_features=pd.read_csv('total_features', sep=',', header=0) 
total_data = pd.read_csv('fraud_payment_data', sep=',', header=0)

In [None]:
total_data=total_data[total_data.USD_amount>0]
total_data=total_data.reset_index(drop=True)
total_data


In [None]:
#Adding USD to final features because they were not included in the modeling features 
final_features["USD_amount"] = total_data["USD_amount"]

In [None]:
#Train, test, split 
train_features = final_features[0:1000000]
val_features = final_features[1000000:1250000]
test_features = final_features[1250000:-1]

y_train = total_data['Label'][0:1000000]
y_val   = total_data['Label'][1000000:1250000]
y_test  = total_data['Label'][1250000:-1]

#Fit scaler on TRAIN ONLY - this learns mean and std from training set 
scaler = StandardScaler()
X_train = scaler.fit_transform(train_features)

# Transform val/test using the SAME scaler
X_val  = scaler.transform(val_features)

In [None]:
#Load model from Fast_Fraud_Screening_Model.ipynb
with open('xgboost_fraud_model.pkl', 'rb') as f:
    model_package = pickle.load(f)

xgb_model = model_package['model']
feature_names = model_package['feature_names']

#Use scale test data for predictions
df_test = pd.DataFrame(X_test, columns=feature_names)

                                               Business KPIs

In [None]:
# Use SCALED test data
df_test = pd.DataFrame(X_test, columns=feature_names)

#Add labels
df_test["is_fraud"] = y_test.values

# dd USD_amount from original test_features
df_test["USD_amount"] = test_features["USD_amount"].values

#Add model scores
df_test["model_score"] = xgb_model.predict_proba(X_test)[:, 1]

df_test

In [None]:
# Explore thresholds to find optimal recall
total_fraud_cases = df_test["is_fraud"].sum()

print("Evaluating threshold options:")
for thresh in [0.12, 0.14, 0.15, 0.16, 0.18, .20, .30, .40, .50]:
    df_test["model_flag"] = (df_test["model_score"] >= thresh).astype(int)
    caught = ((df_test["is_fraud"] == 1) & (df_test["model_flag"] == 1)).sum()
    recall = caught / total_fraud_cases 
    print(f"Threshold {thresh}: Recall = {recall:.2%}, Caught = {caught}")


#Threshold that achieves target recall
threshold = 0.18
df_test["model_flag"] = (df_test["model_score"] >= threshold).astype(int)

#Recalculate recall for selected threshold
caught = ((df_test["is_fraud"] == 1) & (df_test["model_flag"] == 1)).sum()
recall = caught / total_fraud_cases

print(f"\nSelected threshold: {threshold} (Recall: {recall:.2%})")


                                               Synthetic Loss, Missed Risk and Loss Avoided

In [None]:
###Calculate fraud dollar amounts
#Total loss - Synthetic loss or fraud Loss Avoided score
#missed fraud risk score
#loss avoided accumulates only when we catch fraud

df_test["synthetic_loss"] = df_test["USD_amount"] * df_test["is_fraud"]
df_test["missed_risk"] = df_test["synthetic_loss"] * (1- df_test["model_flag"])
df_test["loss_avoided"] = df_test["synthetic_loss"] * df_test["model_flag"]

#Aggregate Metrics
total_loss = df_test["synthetic_loss"].sum()
missed_risk = df_test["missed_risk"].sum()
loss_avoided = df_test["loss_avoided"].sum()

#Caluclate percentages
missed_risk_pct = missed_risk / total_loss
loss_avoided_pct = loss_avoided / total_loss


print(f"Total Loss: ${total_loss:,.2f}")
print(f"Loss Avoided: ${loss_avoided:,.2f} ({loss_avoided_pct:.1%})")
print(f"Missed Risk: ${missed_risk:,.2f} ({missed_risk_pct:.1%})")

In [None]:
#Average fraud amount: caught vs missed (fraud cases only)
caught_frauds = (df_test["is_fraud"] == 1) & (df_test["model_flag"] == 1)
missed_frauds = (df_test["is_fraud"] == 1) & (df_test["model_flag"] == 0)

caught_fraud_avg = df_test[caught_frauds]["USD_amount"].mean()
missed_fraud_avg = df_test[missed_frauds]["USD_amount"].mean()

print("\n=== Fraud Case Characteristics ===")
print(f"Avg $ per caught fraud: ${caught_fraud_avg:.2f}")
print(f"Avg $ per missed fraud: ${missed_fraud_avg:.2f}")
print(f"Value ratio (caught/missed): {caught_fraud_avg/missed_fraud_avg:.2f}x")

In [None]:
#Fraud detection performance
total_fraud_cases = df_test["is_fraud"].sum()
caught_fraud_cases = ((df_test["is_fraud"] == 1) & (df_test["model_flag"] == 1)).sum()
missed_fraud_cases = ((df_test["is_fraud"] == 1) & (df_test["model_flag"] == 0)).sum()

#Additional metrics
total_flagged = df_test["model_flag"].sum()
precision = caught_fraud_cases / total_flagged if total_flagged > 0 else 0

print("\n=== Model Performance Summary ===")
print(f"Total cases flagged: {total_flagged:,}")
print(f"Total fraud cases: {total_fraud_cases:,}")
print(f"Caught fraud cases: {caught_fraud_cases:,}")
print(f"Missed fraud cases: {missed_fraud_cases:,}")
print(f"Recall: {recall:.2%}")
print(f"Precision: {precision:.2%}")

In [None]:
#Vizualizing model performace on Fraud Losses
plt.bar(["Loss avoided", "Missed risk"], [loss_avoided, missed_risk], color=["#42A2B9", "crimson"])
plt.title("Model Performance on Fraud Losses")
plt.ylabel("USD (Millions)")
plt.show()


## Alert Cost Simulation â€“ The Cost of Analysts Reviewing Alerts

Cases vary in complexity, but this estimate acknowledges a balanced mix of simple, moderate, and complex cases.

- Our model flags fraud and leads to an initial triage: 5-15 min to check basic patterns.
- Simple false positives are cleared quiclkly.
- While complex cases are escalated to a stricter model or to an analyst, and may take several hours to days

Realistically, the estimated hourly rate per analyst ranges from:
$5 - $15 for FinTech firms
$16 - $25 for mid-size banks
$26 - $70 for larger banks

**Assumptions:**  
- Standard analyst workday: 8 hours 
- Average review time: 15 minutes per case

**Cost Estimate:**  
Since our data is synthetic from **JPMorgan Chase**, we use average cost from larger banks ($50).

- Average analyst hourly rate: **$50/hour**
- Average review time: ** 15  minutes** per case

**Synthetic Average Review Cost:**

$$
\$12.50 = \$50/\text{hour} \times 15\ \text{minutes}
$$

In [None]:
# Revised realistic assumptions
hourly_rate = 50  # $/hour
avg_review_time_minutes = 0.25  # 15 minutes per case (weighted average)

# Calculate average review cost per case
synthetic_avg_review_cost = hourly_rate * avg_review_time_minutes

print(f"Revised average review cost per case: ${synthetic_avg_review_cost:.2f}")


In [None]:
#Calculate review costs for flagged cases
df_test["review_cost"] = df_test["model_flag"] * synthetic_avg_review_cost

#Total operational cost
total_review_cost = df_test["review_cost"].sum()

print(f"\n=== Operational Costs ===")
print(f"Cases flagged for review: {df_test['model_flag'].sum():,}")
print(f"Cost per case review: ${synthetic_avg_review_cost:.2f}")
print(f"Total review cost: ${total_review_cost:,.2f}")

In [None]:
#False Positive Rate: % of legitimate transactions incorrectly flagged
total_legit = (df_test["is_fraud"] == 0).sum()
false_positives = ((df_test["is_fraud"] == 0) & (df_test["model_flag"] == 1)).sum()
fpr = false_positives / total_legit

#False Negative Rate: % of frauds missed (complement of recall)
fnr = 1 - recall 

print("\n=== Error Rates ===")
print(f"False Positive Rate: {fpr:.2%} ({false_positives:,} false alarms)")
print(f"False Negative Rate: {fnr:.2%} (missed {missed_fraud_cases:,} frauds)")

In [None]:

#Calculate metrics across threshold range
thresholds = np.arange(0.10, 0.50, 0.02)

results = []
for thresh in thresholds:
    flagged = (df_test["model_score"] >= thresh).sum()
    flagged_pct = flagged / len(df_test)
    
    caught = ((df_test["is_fraud"] == 1) & (df_test["model_score"] >= thresh)).sum()
    recall = caught / df_test["is_fraud"].sum()
    
    precision = caught / flagged if flagged > 0 else 0
    
    #Financial metrics
    df_test["temp_flag"] = (df_test["model_score"] >= thresh).astype(int)
    loss_avoided_thresh = (df_test["synthetic_loss"] * df_test["temp_flag"]).sum()
    review_cost_thresh = flagged * 12.50  # $12.50 per case
    net_benefit = loss_avoided_thresh - review_cost_thresh
    
    results.append({
        'threshold': thresh,
        'flagged_pct': flagged_pct * 100,
        'recall': recall * 100,
        'precision': precision * 100,
        'loss_avoided': loss_avoided_thresh / 1e6,  #millions
        'review_cost': review_cost_thresh / 1e6,  #millions
        'net_benefit': net_benefit / 1e6  #millions
    })

#Create DataFrame for easy plotting
import pandas as pd
df_threshold = pd.DataFrame(results)

#Create visualizations
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10))

#Plot 1: Recall vs % Flagged
ax1.plot(df_threshold['threshold'], df_threshold['recall'], 'g-', linewidth=2, label='Recall')
ax1_twin = ax1.twinx()
ax1_twin.plot(df_threshold['threshold'], df_threshold['flagged_pct'], 'orange', linewidth=2, label='% Cases Flagged')
ax1.axvline(x=0.16, color='red', linestyle='--', alpha=0.7, label='Current (0.16)')
ax1.set_xlabel('Threshold')
ax1.set_ylabel('Recall (%)', color='g')
ax1_twin.set_ylabel('% Cases Flagged', color='orange')
ax1.set_title('Recall vs Operational Volume')
ax1.grid(True, alpha=0.3)
ax1.legend(loc='upper left')
ax1_twin.legend(loc='upper right')

#Plot 2: Precision vs Recall
ax2.plot(df_threshold['recall'], df_threshold['precision'], 'b-', linewidth=2)
current_idx = df_threshold[df_threshold['threshold'].round(2) == 0.16].index[0]
ax2.scatter(df_threshold.loc[current_idx, 'recall'], 
            df_threshold.loc[current_idx, 'precision'], 
            color='red', s=200, marker='*', zorder=5, label='Current (0.16)')
ax2.set_xlabel('Recall (%)')
ax2.set_ylabel('Precision (%)')
ax2.set_title('Precision-Recall Trade-off')
ax2.grid(True, alpha=0.3)
ax2.legend()

#Plot 3: Financial Impact
ax3.plot(df_threshold['threshold'], df_threshold['loss_avoided'], 'g-', linewidth=2, label='Loss Avoided')
ax3.plot(df_threshold['threshold'], df_threshold['review_cost'], 'orange', linewidth=2, label='Review Cost')
ax3.plot(df_threshold['threshold'], df_threshold['net_benefit'], 'b--', linewidth=2, label='Net Benefit')
ax3.axvline(x=0.16, color='red', linestyle='--', alpha=0.7, label='Current (0.16)')
ax3.axhline(y=0, color='black', linestyle='-', alpha=0.3)
ax3.set_xlabel('Threshold')
ax3.set_ylabel('$ (Millions)')
ax3.set_title('Financial Impact by Threshold')
ax3.grid(True, alpha=0.3)
ax3.legend()

#Calculate lift in the results loop, adding to loop
results = []
for thresh in thresholds:
    flagged = (df_test["model_score"] >= thresh).sum()
    flagged_pct = flagged / len(df_test)
    
    caught = ((df_test["is_fraud"] == 1) & (df_test["model_score"] >= thresh)).sum()
    recall = caught / df_test["is_fraud"].sum()
    
    precision = caught / flagged if flagged > 0 else 0
    
    # Calculate lift (recall / PPR)
    lift = recall / flagged_pct if flagged_pct > 0 else 0
    
    # Financial metrics
    df_test["temp_flag"] = (df_test["model_score"] >= thresh).astype(int)
    loss_avoided_thresh = (df_test["synthetic_loss"] * df_test["temp_flag"]).sum()
    review_cost_thresh = flagged * 12.50
    net_benefit = loss_avoided_thresh - review_cost_thresh
    
    results.append({
        'threshold': thresh,
        'flagged_pct': flagged_pct * 100,
        'recall': recall * 100,
        'precision': precision * 100,
        'lift': lift, 
        'loss_avoided': loss_avoided_thresh / 1e6,
        'review_cost': review_cost_thresh / 1e6,
        'net_benefit': net_benefit / 1e6
    })

df_threshold = pd.DataFrame(results)

#Plot 4: Summary Table 
key_thresholds = [0.14, 0.15, 0.16, 0.18, 0.20, 0.25, 0.30]
table_data = []
for thresh in key_thresholds:
    row = df_threshold[df_threshold['threshold'].round(2) == thresh]
    if not row.empty:
        table_data.append([
            f"{thresh:.2f}",
            f"{row['recall'].values[0]:.1f}%",
            f"{row['precision'].values[0]:.1f}%",
            f"{row['lift'].values[0]:.2f}x",
            f"{row['flagged_pct'].values[0]:.1f}%",
            f"${row['loss_avoided'].values[0]:.2f}M",
            f"${row['net_benefit'].values[0]:.2f}M"
        ])

ax4.axis('tight')
ax4.axis('off')
table = ax4.table(
    cellText=table_data,
    colLabels=['Threshold', 'Recall', 'Precision', 'Lift', '% Flagged', 'Loss Avoided', 'Net Benefit'],
    cellLoc='center',
    loc='center',
    colWidths=[0.10, 0.10, 0.10, 0.10, 0.12, 0.15, 0.15] 
)
table.auto_set_font_size(False)
table.set_fontsize(8) 
table.scale(1, 2)

#Highlight current threshold row
for i in range(len(table_data)):
    if table_data[i][0] == '0.16':
        for j in range(len(table_data[i])):
            table[(i+1, j)].set_facecolor('#ffcccc')

ax4.set_title('Threshold Comparison Table', pad=20)
plt.tight_layout()
plt.savefig('threshold_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

#Print with lift added
print("\n=== Threshold Analysis Summary ===")
print(f"\nCurrent Threshold (0.16):")
current = df_threshold[df_threshold['threshold'].round(2) == 0.16].iloc[0]
print(f"  Recall: {current['recall']:.1f}%")
print(f"  Precision: {current['precision']:.1f}%")
print(f"  Lift: {current['lift']:.2f}x")  
print(f"  % Flagged: {current['flagged_pct']:.1f}%")
print(f"  Loss Avoided: ${current['loss_avoided']:.2f}M")
print(f"  Review Cost: ${current['review_cost']:.2f}M")
print(f"  Net Benefit: ${current['net_benefit']:.2f}M")

#Optimal threshold
optimal_idx = df_threshold['net_benefit'].idxmax()
optimal = df_threshold.loc[optimal_idx]
print(f"\nOptimal Threshold (Max Net Benefit: {optimal['threshold']:.2f}):")
print(f"  Recall: {optimal['recall']:.1f}%")
print(f"  Precision: {optimal['precision']:.1f}%")
print(f"  Lift: {optimal['lift']:.2f}x") 
print(f"  % Flagged: {optimal['flagged_pct']:.1f}%")
print(f"  Loss Avoided: ${optimal['loss_avoided']:.2f}M")
print(f"  Review Cost: ${optimal['review_cost']:.2f}M")
print(f"  Net Benefit: ${optimal['net_benefit']:.2f}M")