In [None]:
import os
import sys
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append("../src")

from preprocessing import load_unsw_data, preprocess_data
from selection import unsupervised_feature_selection, apply_pca
from sklearn.metrics import roc_auc_score,classification_report

In [None]:
print("[1] Loading UNSW-NB15 dataset...")
df = load_unsw_data("../data/")
X_train, X_test, y_train, y_test = preprocess_data(df)

print(f"[✔] Dataset loaded. Train: {X_train.shape}, Test: {X_test.shape}")

In [None]:
print("\n[2] Performing Feature Selection and PCA...")

#feature selection using Variance Threshold
X_train_fs = unsupervised_feature_selection(X_train)
X_test_fs = X_test[X_train_fs.columns]

# Apply PCA for anomaly modeling
X_train_pca, X_test_pca, _ = apply_pca(X_train_fs, X_test_fs, n_components=15)

print(f"[✔] Final Feature Shape: {X_train_pca.shape}")

In [None]:
print("\n[3] Loading Models...")

models_dir = "../outputs/models/"
model_files = {
    "Isolation Forest":"isolation_forest.pkl",
    "One Class SVM":"one_class_svm.pkl"
}

models = {}
for name, file in model_files.items():
    path = os.path.join(models_dir, file)
    if os.path.exists(path):
        models[name] = joblib.load(path)
        print(f"[✔] Loaded {name}")
    else:
        print(f"[⚠] {file} not found. Skipping...")

In [None]:
results = []

for name, model in models.items():
    preds=model.fit_predict(X_test_pca)
    preds = np.where(preds == -1, 1, 0)
    auc_roc=roc_auc_score(y_test,preds)

    print(f"\n=== {name} ===")
    print(classification_report(y_test, preds, digits=4))

    results.append({
        "Model": name,
        "AUC-ROC":auc_roc
    })

results_df = pd.DataFrame(results).sort_values(by="AUC-ROC", ascending=False)
results_df.reset_index(drop=True, inplace=True)
print("\n=== Summary ===")
display(results_df)

In [None]:
plt.figure(figsize=(9,5))
sns.barplot(x="Model", y="AUC-ROC", data=results_df, hue="Model", dodge=False)
plt.title("Model Comparison by AUC-ROC")
plt.ylim(0.5, 1.0)
plt.tight_layout()
plt.savefig("../outputs/figures/model_auc-roc_comparison.png", dpi=300)
plt.show()


In [None]:
plt.figure(figsize=(7,5))
plt.scatter(X_test_pca[:, 0], X_test_pca[:, 1], c=y_test, cmap="coolwarm", s=8, alpha=0.6)
plt.title("PCA 2D Projection of Test Data (Normal vs Attack)")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.tight_layout()
plt.savefig("../outputs/figures/pca_visualization.png")
plt.show()

In [None]:
print("\n===Final Unsupervised Model Results ===")
print(results.to_string(index=False))
