In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import matplotlib.pyplot as pltx
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, confusion_matrix, roc_curve, classification_report ,accuracy_score
from sklearn.model_selection import train_test_split

In [3]:
# Data path for train and test
X_path = '/content/drive/MyDrive/Cozii/ieee-fraud-detection-processed-data/X_resampled.csv'
y_path = '/content/drive/MyDrive/Cozii/ieee-fraud-detection-processed-data/y_resampled.csv'

In [4]:
# Load data
X = pd.read_csv(X_path)
y = pd.read_csv(y_path)

In [5]:
# Split data into train and test sets with 80% tranining and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
del X
del y

In [6]:
# Ensure y is a 1D array by accessing the values of the 'isFraud' column
y_train = y_train['isFraud'].values.ravel() # Access 'isFraud' column and convert to NumPy array
y_test = y_test['isFraud'].values.ravel()   # Access 'isFraud' column and convert to NumPy array

In [None]:
# Initialize the model
oc_svm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.05)  # `nu` controls anomaly rate

# Fit on training data
oc_svm.fit(X_train)

In [None]:
# Get anomaly scores (decision function)
y_pred_scores = oc_svm.decision_function(X_test)

# Convert to probability-like scores
y_pred_prob = -y_pred_scores  # Higher means fraud

# Convert to binary labels (adjust threshold if needed)
threshold = 0  # Default: If score < 0, mark as fraud
y_pred = (y_pred_scores < threshold).astype(int)

In [None]:
# Print evaluation metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Compute Metrics
auc_score = roc_auc_score(y_test, y_pred_prob)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

# Print Metrics
print(f"AUC-ROC Score: {auc_score:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print("Confusion Matrix:")
print(cm)