In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import matplotlib.pyplot as pltx
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, confusion_matrix, roc_curve, classification_report ,accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import resample

In [None]:
# Data path for train and test
X_path = '/content/drive/MyDrive/Cozii/ieee-fraud-detection-processed-data/X_resampled.csv'
y_path = '/content/drive/MyDrive/Cozii/ieee-fraud-detection-processed-data/y_resampled.csv'

In [None]:
# Load data
X = pd.read_csv(X_path)
y = pd.read_csv(y_path)

In [None]:
'''
Normilzation of X
    Min-Max Normizxation of X
'''

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform the data
X_normalized = scaler.fit_transform(X)

# Convert back to DataFrame (optional, but often useful)
X_normalized = pd.DataFrame(X_normalized, columns=X.columns)

In [None]:
'''

subsample the data to 10% of the original size,
due to O(n^2) complexity of the OneClassSVM algorithm,

'''
# Define the subsampling size (10% of the data)
subsample_size = 0.1

# Subsample the data
X_subsampled, y_subsampled = resample(X_normalized, y, n_samples=int(len(X_normalized) * subsample_size), random_state=42) # Use random_state for reproducibility

In [None]:
# Split data into train and test sets with 80% tranining and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X_subsampled, y_subsampled, test_size=0.2, random_state=42, stratify=y_subsampled)
del X_normalized
del X_subsampled
del y_subsampled
del X
del y

In [None]:
# Ensure y is a 1D array by accessing the values of the 'isFraud' column
y_train = y_train['isFraud'].values.ravel() # Access 'isFraud' column and convert to NumPy array
y_test = y_test['isFraud'].values.ravel()   # Access 'isFraud' column and convert to NumPy array

In [None]:
# Initialize the model
oc_svm = OneClassSVM(kernel='rbf', gamma='scale', nu=0.05)  # `nu` controls anomaly rate

# Fit on training data
oc_svm.fit(X_train)

In [None]:
# Get anomaly scores (decision function)
y_pred_scores = oc_svm.decision_function(X_test)

# Convert to probability-like scores
y_pred_prob = -y_pred_scores  # Higher means fraud

# Convert to binary labels (adjust threshold if needed)
threshold = 0  # Default: If score < 0, mark as fraud
y_pred = (y_pred_scores < threshold).astype(int)

In [None]:
# Print evaluation metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.49107260364114935
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.94      0.65     11405
           1       0.41      0.04      0.08     11390

    accuracy                           0.49     22795
   macro avg       0.45      0.49      0.36     22795
weighted avg       0.45      0.49      0.36     22795



In [None]:
# Compute Metrics
accuracy = accuracy_score(y_test, y_pred)
auc_score = roc_auc_score(y_test, y_pred_prob)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

# Print Metrics
print(f"AUC-ROC Score: {auc_score:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print("Confusion Matrix:")
print(cm)

AUC-ROC Score: 0.4412
F1-Score: 0.0786
Precision: 0.4122
Recall: 0.0435
Confusion Matrix:
[[10699   706]
 [10895   495]]
