In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
df = pd.read_csv("creditcard.csv")

# Separate features and target
X = df.drop("Class", axis=1)
y = df["Class"]

# Normalize 'Time' and 'Amount'
scaler = StandardScaler()
X[["Time", "Amount"]] = scaler.fit_transform(X[["Time", "Amount"]])

# Split into train and test sets (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Combine for resampling
train_data = pd.concat([X_train, y_train], axis=1)
majority = train_data[train_data.Class == 0]
minority = train_data[train_data.Class == 1]

# Downsample majority class
majority_downsampled = resample(
    majority, replace=False, n_samples=len(minority), random_state=42
)

# Combine downsampled majority with minority
train_downsampled = pd.concat([majority_downsampled, minority])

# Split features and target
X_train_down = train_downsampled.drop("Class", axis=1)
y_train_down = train_downsampled["Class"]

# Train Random Forest
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_down, y_train_down)

# Predict on test set
y_pred = clf.predict(X_test)

# Evaluate
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.96      0.98     56864
           1       0.04      0.92      0.08        98

    accuracy                           0.96     56962
   macro avg       0.52      0.94      0.53     56962
weighted avg       1.00      0.96      0.98     56962

Confusion Matrix:
 [[54827  2037]
 [    8    90]]
