In [1]:
%pip install imbalanced-learn

Collecting imbalanced-learn
  Downloading imbalanced_learn-0.12.2-py3-none-any.whl.metadata (8.2 kB)
Downloading imbalanced_learn-0.12.2-py3-none-any.whl (257 kB)
   ---------------------------------------- 0.0/258.0 kB ? eta -:--:--
   ---------------------------------------- 0.0/258.0 kB ? eta -:--:--
   - -------------------------------------- 10.2/258.0 kB ? eta -:--:--
   ---- ---------------------------------- 30.7/258.0 kB 330.3 kB/s eta 0:00:01
   --------- ----------------------------- 61.4/258.0 kB 544.7 kB/s eta 0:00:01
   ---------------------------------------  256.0/258.0 kB 1.7 MB/s eta 0:00:01
   ---------------------------------------- 258.0/258.0 kB 1.6 MB/s eta 0:00:00
Installing collected packages: imbalanced-learn
Successfully installed imbalanced-learn-0.12.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE

In [3]:
# Load the dataset
data = pd.read_csv('..\Dataset\Aggregated_Sleep.csv')

# Removing unnecessary columns
data_cleaned = data.drop(['patient_id', 'window_start'], axis=1)

# Handle missing values by imputing with median
imputer = SimpleImputer(strategy='median')
data_cleaned[data_cleaned.columns] = imputer.fit_transform(data_cleaned)

# Split data into features and target
X = data_cleaned.drop('agitation', axis=1)
y = data_cleaned['agitation']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Scaling the data
scaler = StandardScaler()
X_train_smote_scaled = scaler.fit_transform(X_train_smote)
X_test_scaled = scaler.transform(X_test)

# One-class SVM model trained on SMOTE data
svm_model_smote = OneClassSVM(kernel='rbf', gamma='auto').fit(X_train_smote_scaled)

# Isolation Forest model trained on SMOTE data
iso_forest_model_smote = IsolationForest(n_estimators=100, contamination='auto', random_state=42).fit(X_train_smote_scaled)

# Making predictions with SMOTE-trained models
svm_preds_smote = svm_model_smote.predict(X_test_scaled)
iso_forest_preds_smote = iso_forest_model_smote.predict(X_test_scaled)

# Convert predictions from -1, 1 to 0, 1
svm_preds_smote = (svm_preds_smote == -1).astype(int)
iso_forest_preds_smote = (iso_forest_preds_smote == -1).astype(int)

# Evaluate models trained with SMOTE
print("Evaluation with SMOTE:")
print("One-class SVM Report:\n", classification_report(y_test, svm_preds_smote))
print("ROC-AUC for SVM:", roc_auc_score(y_test, svm_preds_smote))
print("Isolation Forest Report:\n", classification_report(y_test, iso_forest_preds_smote))
print("ROC-AUC for Isolation Forest:", roc_auc_score(y_test, iso_forest_preds_smote))

Evaluation with SMOTE:
One-class SVM Report:
               precision    recall  f1-score   support

         0.0       0.96      0.27      0.42       271
         1.0       0.02      0.57      0.04         7

    accuracy                           0.28       278
   macro avg       0.49      0.42      0.23       278
weighted avg       0.94      0.28      0.41       278

ROC-AUC for SVM: 0.42040063257775434
Isolation Forest Report:
               precision    recall  f1-score   support

         0.0       0.97      0.85      0.91       271
         1.0       0.02      0.14      0.04         7

    accuracy                           0.83       278
   macro avg       0.50      0.49      0.47       278
weighted avg       0.95      0.83      0.88       278

ROC-AUC for Isolation Forest: 0.4939377965208223
