# EDA - Credit Card Data

This notebook performs exploratory data analysis on the credit card fraud dataset.


In [None]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Add src to path
sys.path.append(str(Path.cwd().parent / "src"))

from preprocessor import PreprocessingPipeline

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
%matplotlib inline


## Process Credit Card Data

Use the complete preprocessing pipeline for credit card data:


In [None]:
# Initialize pipeline
pipeline = PreprocessingPipeline(
    data_dir="../data/raw",
    output_dir="../data/processed"
)

# Process credit card data
try:
    processed_df, metadata = pipeline.process_credit_card_data(
        credit_card_file="creditcard.csv",
        target_column="Class",
        perform_eda=True,
        handle_imbalance=True,
        save_processed=True
    )
    
    print(f"\nFinal processed data shape: {processed_df.shape}")
    print(f"\nSteps completed: {metadata['steps_completed']}")
    
except FileNotFoundError as e:
    print(f"Data file not found: {e}")
    print("Please ensure creditcard.csv is in the data/raw directory")
except Exception as e:
    print(f"Error in preprocessing: {e}")
    import traceback
    traceback.print_exc()
