In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np

In [None]:
# Data path for train and test
X_path = '/content/drive/MyDrive/Cozii/ieee-fraud-detection-processed-data/X_resampled.csv'
y_path = '/content/drive/MyDrive/Cozii/ieee-fraud-detection-processed-data/y_resampled.csv'

In [None]:
# Load data
X = pd.read_csv(X_path)
y = pd.read_csv(y_path)

In [None]:
# Split data into train and test sets with 80% tranining and 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
del X
del y

In [None]:
# Ensure y is a 1D array by accessing the values of the 'isFraud' column
y_train = y_train['isFraud'].values.ravel() # Access 'isFraud' column and convert to NumPy array
y_test = y_test['isFraud'].values.ravel()   # Access 'isFraud' column and convert to NumPy array

In [None]:
# Define the Isolation Forest model
iso_forest = IsolationForest(
    n_estimators=100,
    contamination='auto',  # Adjust based on expected fraud ratio
    random_state=42
)

# Fit the model
iso_forest.fit(X_train)

In [None]:
# Predict on test set
# Predict anomalies (-1 = anomaly, 1 = normal)
y_pred = iso_forest.predict(X_test)

# Convert predictions to binary (Fraud = 1, Normal = 0)
y_pred = np.where(y_pred == -1, 1, 0)

In [None]:
# Print evaluation metrics
print("Classification Report:\n", classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.99      0.67    113976
           1       0.80      0.04      0.08    113975

    accuracy                           0.51    227951
   macro avg       0.65      0.51      0.37    227951
weighted avg       0.65      0.51      0.37    227951



In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
# Evaluation Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

# Print results
print("Model Performance Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"AUC-Score: {auc:.4f}")

Model Performance Metrics:
Accuracy: 0.5147
Precision: 0.7964
Recall: 0.0396
F1-Score: 0.0755
AUC-Score: 0.5147
