Installing required modules

In [None]:
import sys
import subprocess

def install(pkg):
    subprocess.check_call([sys.executable, "-m", "pip", "install", pkg]);

packages = ["pandas"];

for p in packages:
    try:
        __import__(p.split("-")[0]);
    except ImportError:
        print("Installing package:", p);
        install(p);

importing modules

In [None]:
import numpy as np;
import pandas as pd;
import matplotlib.pyplot as plt;
from sklearn.metrics import accuracy_score, f1_score;
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
from collections import Counter


In [None]:
y_test = np.load("../results/y_test.npy")

svm_test_pred = np.load("../results/svm_test_pred.npy")
rf_test_pred = np.load("../results/rf_test_pred.npy")
knn_test_pred = np.load("../results/knn_test_pred.npy")
logreg_test_pred = np.load("../results/logreg_test_pred.npy")
gb_test_pred = np.load("../results/gb_test_pred.npy")

Part A: Classical Machine Learning models.
- Store all model results

In [None]:
results = []

results.append(["SVM", accuracy_score(y_test, svm_test_pred), 
                f1_score(y_test, svm_test_pred, average="macro")])

results.append(["Random Forest", accuracy_score(y_test, rf_test_pred), 
                f1_score(y_test, rf_test_pred, average="macro")])

results.append(["k-NN", accuracy_score(y_test, knn_test_pred), 
                f1_score(y_test, knn_test_pred, average="macro")])

results.append(["Logistic Regression", accuracy_score(y_test, logreg_test_pred), 
                f1_score(y_test, logreg_test_pred, average="macro")])

results.append(["Gradient Boosting", accuracy_score(y_test, gb_test_pred), 
                f1_score(y_test, gb_test_pred, average="macro")])


Create comparison table

In [None]:
df_results = pd.DataFrame(results, columns=["Model", "Accuracy", "Macro F1-score"])
df_results


Bar chart comparison

In [None]:
plt.figure(figsize=(9,5))
plt.bar(df_results["Model"], df_results["Accuracy"])
plt.xticks(rotation=15)
plt.ylim(0,1.05)
plt.ylabel("Accuracy")
plt.title("Model Comparison – Test Accuracy")
plt.grid(axis="y", alpha=0.3)
plt.show()


F1 plot

In [None]:
plt.figure(figsize=(9,5))
plt.bar(df_results["Model"], df_results["Macro F1-score"])
plt.xticks(rotation=15)
plt.ylim(0,1.05)
plt.ylabel("Macro F1-score")
plt.title("Model Comparison – Macro F1 Score")
plt.grid(axis="y", alpha=0.3)
plt.show()


In [None]:
models = {
    "SVM": svm_test_pred,
    "Random Forest": rf_test_pred,
    "k-NN": knn_test_pred,
    "Logistic Regression": logreg_test_pred,
    "Gradient Boosting": gb_test_pred
}

for name, preds in models.items():
    ConfusionMatrixDisplay.from_predictions(y_test, preds)
    plt.title(f"{name} – Confusion Matrix")
    plt.show()


In [None]:


# Number of classes
n_classes = len(np.unique(y_test))

# Binarize labels
y_test_bin = label_binarize(y_test, classes=np.arange(n_classes))
y_pred_bin = label_binarize(logreg_test_pred, classes=np.arange(n_classes))

plt.figure(figsize=(7,6))

for i in range(n_classes):
    fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_pred_bin[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"Class {i} (AUC = {roc_auc:.2f})")

plt.plot([0,1], [0,1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curves (Logistic Regression – One vs Rest)")
plt.legend()
plt.show()


In [None]:
svm_train_time = float(np.load("../results/svm_train_time.npy"))
svm_test_time  = float(np.load("../results/svm_test_time.npy"))

rf_train_time = float(np.load("../results/rf_train_time.npy"))
rf_test_time  = float(np.load("../results/rf_test_time.npy"))

knn_train_time = float(np.load("../results/knn_train_time.npy"))
knn_test_time  = float(np.load("../results/knn_test_time.npy"))

logreg_train_time = float(np.load("../results/logreg_train_time.npy"))
logreg_test_time  = float(np.load("../results/logreg_test_time.npy"))

gb_train_time = float(np.load("../results/gb_train_time.npy"))
gb_test_time  = float(np.load("../results/gb_test_time.npy"))


In [None]:

time_results = pd.DataFrame({
    "Model": ["SVM", "Random Forest", "k-NN", "Logistic Regression", "Gradient Boosting"],
    "Train Time (s)": [svm_train_time, rf_train_time, knn_train_time, logreg_train_time, gb_train_time],
    "Test Time (s)": [svm_test_time, rf_test_time, knn_test_time, logreg_test_time, gb_test_time]
})

time_results


In [None]:
plt.figure(figsize=(9,5));
plt.bar(time_results["Model"], time_results["Train Time (s)"]);
plt.yscale("log");
plt.xticks(rotation=15);
plt.ylabel("Seconds (log scale)");
plt.title("Training Time Comparison (Log Scale)");
plt.grid(axis="y", alpha=0.3);
plt.show();



In [None]:

models = {
    "SVM": svm_test_pred,
    "Random Forest": rf_test_pred,
    "k-NN": knn_test_pred,
    "Logistic Regression": logreg_test_pred,
    "Gradient Boosting": gb_test_pred
}

unique_classes = np.unique(y_test)

for model_name, preds in models.items():

    print("\n======================================")
    print("Model:", model_name)
    print("======================================")

    wrong_idx = np.where(preds != y_test)[0]

    print("Total test samples:", len(y_test))
    print("Total misclassified:", len(wrong_idx))
    print("Accuracy:", 1 - len(wrong_idx)/len(y_test))

    if len(wrong_idx) == 0:
        print("No misclassifications.")
        continue

    print("\nSome misclassified indices:", wrong_idx[:10])

    print("\n--- Sample errors (True → Predicted) ---")
    for i in wrong_idx[:10]:
        print(f"Index {i} | True: {y_test[i]} | Pred: {preds[i]}")

    # Error pattern summary
    error_pairs = [(y_test[i], preds[i]) for i in wrong_idx]
    error_summary = Counter(error_pairs)

    print("\n--- Error pattern summary (True → Predicted) ---")
    for k, v in error_summary.items():
        print(f"{k[0]} → {k[1]} : {v} samples")

    # Class-wise error count
    print("\n--- Class-wise error count ---")
    for cls in unique_classes:
        total = np.sum(y_test == cls)
        wrong = np.sum((y_test == cls) & (preds != cls))
        print(f"Class {cls} : {wrong}/{total} misclassified")

