In [15]:
# Load the libraries!!!

import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import os

# Simulate on a real-world data
from sklearn.datasets import fetch_covtype

In [2]:
# Fetch real data
print("Loading dataset...")
X, y = fetch_covtype(return_X_y=True)
X = X[:5000]  # subset for speed
y = y[:5000]

Loading dataset...


In [17]:
# Filter only "normal" samples (e.g., class 2 = normal)
# Train on class 2 only (assumed normal)
X_train = X[y == 2]
X_test = X
y_test = y

# Binary label: 1 = normal (class 2), -1 = anomaly (everything else)
y_true = np.where(y_test == 2, 1, -1)

In [18]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [19]:
# Train One-Class SVM
print("Training One-Class SVM...")
model = OneClassSVM(kernel="rbf", nu=0.05, gamma='scale')
model.fit(X_train_scaled)

Training One-Class SVM...


In [20]:
# Predict
print("Evaluating model...")
y_pred = model.predict(X_test_scaled)

Evaluating model...


In [21]:
# Print metrics
print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=["Anomaly", "Normal"]))


Confusion Matrix:
[[2752 1300]
 [  52  896]]

Classification Report:
              precision    recall  f1-score   support

     Anomaly       0.98      0.68      0.80      4052
      Normal       0.41      0.95      0.57       948

    accuracy                           0.73      5000
   macro avg       0.69      0.81      0.69      5000
weighted avg       0.87      0.73      0.76      5000

