# Credit Card Fraud Detection - EDA and Preprocessing

This notebook explores the dataset, handles class imbalance, and prepares the data for modeling.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys

# Add src to path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

from data_loader import load_data, preprocess_data

sns.set(style="whitegrid")

In [None]:
DATA_PATH = r"../../datasets/financial/credit_card_fraud/creditcard.csv"
df = load_data(DATA_PATH)
df = preprocess_data(df)
df.head()

In [None]:
print("Class Distribution:")
print(df['Class'].value_counts(normalize=True))

plt.figure(figsize=(6, 4))
sns.countplot(x='Class', data=df)
plt.title('Class Distribution (0: Normal, 1: Fraud)')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
df['Amount_Log'] = np.log1p(df['Amount'])
sns.histplot(data=df, x='Amount_Log', hue='Class', bins=50, kde=True, common_norm=False, stat='density')
plt.title('Transaction Amount Distribution by Class (Log Scale)')
plt.show()