# 🛡️ Fraud Buster ML

This notebook demonstrates a full pipeline for detecting fraudulent transactions using e-commerce data. The core logic is abstracted in `utils.py` for reusability and modularity.

In [None]:
from utils import load_data, clean_data, feature_engineer, balance_and_scale, train_models, evaluate_model, plot_roc, plot_feature_importance
import pandas as pd
from sklearn.model_selection import train_test_split


## 1. Load and Clean Data

In [None]:
df1, df2 = load_data()
df = clean_data(df1, df2)
df.shape


## 2. Feature Engineering

In [None]:
df = feature_engineer(df)
df.head()


## 3. Split Data

In [None]:
X = df.drop('Is Fraudulent', axis=1)
y = df['Is Fraudulent']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


## 4. Balance and Scale Data using SMOTE

In [None]:
X_train_bal, y_train_bal = balance_and_scale(X_train, y_train)
X_test_scaled, _ = balance_and_scale(X_test, y_test)  # scale test set without SMOTE


## 5. Train Models

In [None]:
models = train_models(X_train_bal, y_train_bal)


## 6. Evaluate Models

In [None]:
evaluate_model(models, X_test_scaled, y_test)


## 7. ROC Curves

In [None]:
plot_roc(models, X_test_scaled, y_test)


## 8. Feature Importance (Random Forest Only)

In [None]:
plot_feature_importance(models['Random Forest'], X.columns)
