In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("creditcard_small.csv")
print(df["Class"].value_counts())  

X = df.drop("Class", axis=1)
y = df["Class"]

scaler = StandardScaler()
X[["Time", "Amount"]] = scaler.fit_transform(X[["Time", "Amount"]])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

Class
0    4508
1     492
Name: count, dtype: int64


In [3]:
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report

iso_forest = IsolationForest(contamination=0.05, random_state=42)
y_pred_if = iso_forest.fit_predict(X_test)

y_pred_if = [1 if x == -1 else 0 for x in y_pred_if]

print("Isolation Forest:\n", classification_report(y_test, y_pred_if))

Isolation Forest:
               precision    recall  f1-score   support

           0       0.94      0.99      0.96       902
           1       0.80      0.41      0.54        98

    accuracy                           0.93      1000
   macro avg       0.87      0.70      0.75      1000
weighted avg       0.93      0.93      0.92      1000



In [4]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred_clf = clf.predict(X_test)

print("Random Forest:\n", classification_report(y_test, y_pred_clf))

Random Forest:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99       902
           1       1.00      0.89      0.94        98

    accuracy                           0.99      1000
   macro avg       0.99      0.94      0.97      1000
weighted avg       0.99      0.99      0.99      1000



In [5]:
sample = X_test.iloc[0:1]
true_label = y_test.iloc[0]
predicted = clf.predict(sample)[0]

print(f"True Label: {true_label}, Predicted: {predicted}")

True Label: 0, Predicted: 0
