# Fraud Detection - Modeling (Refactored)

This notebook demonstrates a modular approach to modeling fraud detection. We use the `ModelTrainer` class for training and evaluation, ensuring consistency across different datasets.

In [None]:
import sys
import os
import pd as pd
import numpy as np
sys.path.append(os.path.abspath('../'))

from sklearn.model_selection import train_test_split
from scripts.imbalance_handler import ImbalanceHandler
from scripts.data_clean import DataCleaner
from scripts.modeling_utils import ModelTrainer

import warnings
warnings.filter_warnings('ignore')

## 1. Data Preparation & SMOTE

We load the processed datasets, perform stratified splits, and apply SMOTE to balance the training data.

In [None]:
cleaner = DataCleaner()
handler = ImbalanceHandler()
trainer = ModelTrainer()

# --- Fraud Data ---
fraud_df = pd.read_csv("../data/processed/processed_data.csv")
fraud_df_ml = cleaner.prepare_for_modeling(fraud_df, target_col='class')
X_f = fraud_df_ml.drop('class', axis=1)
y_f = fraud_df_ml['class']
X_train_f, X_test_f, y_train_f, y_test_f = train_test_split(X_f, y_f, test_size=0.2, random_state=42, stratify=y_f)
X_train_f_s, y_train_f_s = handler.resample_smote(X_train_f, y_train_f)

# --- Credit Card Data ---
cc_df = pd.read_csv("../data/raw/creditcard.csv")
X_cc = cc_df.drop('Class', axis=1)
y_cc = cc_df['Class']
X_train_cc, X_test_cc, y_train_cc, y_test_cc = train_test_split(X_cc, y_cc, test_size=0.2, random_state=42, stratify=y_cc)
X_train_cc_s, y_train_cc_s = handler.resample_smote(X_train_cc, y_train_cc)

print("Data Preparation Complete.")

## 2. Baseline Model: Logistic Regression

In [None]:
lr_f = trainer.train_logistic_regression(X_train_f_s, y_train_f_s)
res_lr_f = trainer.evaluate_model(lr_f, X_test_f, y_test_f, "LR Fraud")

lr_cc = trainer.train_logistic_regression(X_train_cc_s, y_train_cc_s)
res_lr_cc = trainer.evaluate_model(lr_cc, X_test_cc, y_test_cc, "LR Credit Card")

## 3. Ensemble Model: LightGBM

In [None]:
lgb_f = trainer.train_lightgbm(X_train_f_s, y_train_f_s)
res_lgb_f = trainer.evaluate_model(lgb_f, X_test_f, y_test_f, "LGBM Fraud")

lgb_cc = trainer.train_lightgbm(X_train_cc_s, y_train_cc_s)
res_lgb_cc = trainer.evaluate_model(lgb_cc, X_test_cc, y_test_cc, "LGBM Credit Card")

## 4. Cross-Validation

In [None]:
trainer.perform_cross_validation(X_f, y_f, lgb_f, "LGBM Fraud")
trainer.perform_cross_validation(X_cc, y_cc, lgb_cc, "LGBM Credit Card")

## 5. Model Comparison

In [None]:
results = [res_lr_f, res_lr_cc, res_lgb_f, res_lgb_cc]
comparison_df = trainer.compare_models(results)
comparison_df