## Data Transformation

##### Setup and Imports

In [1]:
import pandas as pd
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from src.transformation import (
    encode_categorical,
    scale_features,
    apply_smote,
    split_data
)

from src.data_loader import load_data

# Load the dataset
df= load_data("../data/fraud_with_features.csv")


##### Separate features and target

In [2]:
X = df.drop('class', axis=1)
y = df['class']

##### Encode categoricals

In [3]:
X_encoded = encode_categorical(X)

##### Train-test split

In [4]:
X_train, X_test, y_train, y_test = split_data(X_encoded, y)

##### Handle class imbalance with SMOTE

In [None]:
X_train_resampled, y_train_resampled = apply_smote(X_train, y_train)

##### Scale features

In [6]:
X_train_scaled, scaler = scale_features(X_train_resampled)
X_test_scaled = pd.DataFrame(
    scaler.transform(X_test),
    columns=X_test.columns
)

##### Save transformed datasets

In [7]:
X_train_scaled.to_csv("../data/X_train_scaled.csv", index=False)
X_test_scaled.to_csv("../data/X_test_scaled.csv", index=False)
y_train_resampled.to_csv("../data/y_train_resampled.csv", index=False)
y_test.to_csv("../data/y_test.csv", index=False)

print("Data transformation complete. Files saved to /data.")

Data transformation complete. Files saved to /data.
