Installing required modules

In [None]:
import sys
import subprocess

def install(pkg):
    subprocess.check_call([sys.executable, "-m", "pip", "install", pkg]);

packages = ["pandas"];

for p in packages:
    try:
        __import__(p.split("-")[0]);
    except ImportError:
        print("Installing package:", p);
        install(p);

importing modules

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from sklearn.metrics import accuracy_score, f1_score, roc_curve, auc
from sklearn.preprocessing import label_binarize


In [None]:
# ===================================================
# Load saved results from Part-A (classical models)
# ===================================================

y_test = np.load("../results/y_test.npy")

svm_pred = np.load("../results/svm_test_pred.npy")
rf_pred  = np.load("../results/rf_test_pred.npy")
knn_pred = np.load("../results/knn_test_pred.npy")
lr_pred  = np.load("../results/logreg_test_pred.npy")
gb_pred  = np.load("../results/gb_test_pred.npy")

svm_train_time = np.load("../results/svm_train_time.npy")
rf_train_time  = np.load("../results/rf_train_time.npy")
knn_train_time = np.load("../results/knn_train_time.npy")
lr_train_time  = np.load("../results/logreg_train_time.npy")
gb_train_time  = np.load("../results/gb_train_time.npy")

svm_test_time = np.load("../results/svm_test_time.npy")
rf_test_time  = np.load("../results/rf_test_time.npy")
knn_test_time = np.load("../results/knn_test_time.npy")
lr_test_time  = np.load("../results/logreg_test_time.npy")
gb_test_time  = np.load("../results/gb_test_time.npy")

svm_acc = np.load("../results/svm_accuracy.npy")
rf_acc  = np.load("../results/rf_accuracy.npy")
knn_acc = np.load("../results/knn_accuracy.npy")
lr_acc  = np.load("../results/logreg_accuracy.npy")
gb_acc  = np.load("../results/gb_accuracy.npy")

svm_f1 = np.load("../results/svm_f1.npy")
rf_f1  = np.load("../results/rf_f1.npy")
knn_f1 = np.load("../results/knn_f1.npy")
lr_f1  = np.load("../results/logreg_f1.npy")
gb_f1  = np.load("../results/gb_f1.npy")

print("All results loaded successfully.")

In [None]:
models = ["SVM", "Random Forest", "k-NN", "Logistic Regression", "Gradient Boosting"]

df = pd.DataFrame({
    "Model": models,
    "Accuracy": [svm_acc, rf_acc, knn_acc, lr_acc, gb_acc],
    "Macro F1-score": [svm_f1, rf_f1, knn_f1, lr_f1, gb_f1],
    "Training Time (s)": [svm_train_time, rf_train_time, knn_train_time, lr_train_time, gb_train_time],
    "Testing Time (s)": [svm_test_time, rf_test_time, knn_test_time, lr_test_time, gb_test_time]
})

df


In [None]:
os.makedirs("../results/performance_plots", exist_ok=True)

plt.figure(figsize=(8,5))
plt.bar(models, [svm_acc, rf_acc, knn_acc, lr_acc, gb_acc])
plt.ylabel("Accuracy")
plt.title("Model Accuracy Comparison")
plt.xticks(rotation=30)
plt.tight_layout()
plt.savefig("../results/performance_plots/accuracy_comparison.png", dpi=300)
plt.show()


In [None]:
# ===================================================
# Inference time per video
# ===================================================

n_test = len(y_test)

inf_per_video = {
    "SVM": svm_test_time / n_test,
    "Random Forest": rf_test_time / n_test,
    "k-NN": knn_test_time / n_test,
    "Logistic Regression": lr_test_time / n_test,
    "Gradient Boosting": gb_test_time / n_test
}

inf_df = pd.DataFrame(list(inf_per_video.items()),
                      columns=["Model", "Inference Time per Video (s)"])

inf_df



In [None]:
plt.figure(figsize=(8,5))
plt.bar(inf_df["Model"], inf_df["Inference Time per Video (s)"])
plt.yscale("log")
plt.xticks(rotation=30)
plt.ylabel("Seconds (log scale)")
plt.title("Inference Time per Video")
plt.tight_layout()
plt.savefig("../results/performance_plots/inference_time_per_video.png", dpi=300)
plt.show()

In [None]:
model_files = {
    "SVM": "../results/svm_model.joblib",
    "Random Forest": "../results/rf_model.joblib",
    "k-NN": "../results/knn_model.joblib",
    "Logistic Regression": "../results/logreg_model.joblib",
    "Gradient Boosting": "../results/gb_model.joblib"
}

sizes = []

for model, path in model_files.items():
    if os.path.exists(path):
        size_mb = os.path.getsize(path) / (1024*1024)
        sizes.append([model, size_mb])
    else:
        sizes.append([model, None])

size_df = pd.DataFrame(sizes, columns=["Model", "Model Size (MB)"])
size_df


In [None]:
plt.figure(figsize=(8,5))
plt.bar(size_df["Model"], size_df["Model Size (MB)"])
plt.xticks(rotation=30)
plt.ylabel("Size (MB)")
plt.title("Model Size Comparison")
plt.tight_layout()
plt.savefig("../results/performance_plots/model_size_comparison.png", dpi=300)
plt.show()


In [None]:
param_counts = []

# Logistic Regression parameters
lr_coef = np.load("../results/logreg_model.joblib", allow_pickle=True) if False else None
# Instead, we estimate via attributes if models are loaded (optional)

param_counts = [
    ["Logistic Regression", "Linear weights (PCA-reduced)"],
    ["SVM", "Support vectors based"],
    ["Random Forest", "All tree nodes"],
    ["Gradient Boosting", "Sequential tree ensemble"],
    ["k-NN", "Stores full training set"]
]

param_df = pd.DataFrame(param_counts, columns=["Model", "Parameter Representation"])
param_df


In [None]:
plt.figure(figsize=(8,5))
plt.bar(models, [svm_f1, rf_f1, knn_f1, lr_f1, gb_f1])
plt.ylabel("Macro F1-score")
plt.title("Model F1-score Comparison")
plt.xticks(rotation=30)
plt.tight_layout()
plt.savefig("../results/performance_plots/f1_comparison.png", dpi=300)
plt.show()


In [None]:
x = np.arange(len(models))
width = 0.35

plt.figure(figsize=(9,5))
plt.bar(x - width/2, [svm_train_time, rf_train_time, knn_train_time, lr_train_time, gb_train_time],
        width, label="Train")
plt.bar(x + width/2, [svm_test_time, rf_test_time, knn_test_time, lr_test_time, gb_test_time],
        width, label="Test")

plt.yscale("log")   # ⭐ IMPORTANT LINE

plt.xticks(x, models, rotation=30)
plt.ylabel("Time (seconds) [log scale]")
plt.title("Training vs Testing Time Comparison (Log Scale)")
plt.legend()
plt.tight_layout()
plt.savefig("../results/performance_plots/time_comparison_log.png", dpi=300)
plt.show()



In [None]:
classes = np.unique(y_test)
y_test_bin = label_binarize(y_test, classes=classes)
gb_bin = label_binarize(gb_pred, classes=classes)

plt.figure(figsize=(7,6))

for i in range(len(classes)):
    fpr, tpr, _ = roc_curve(y_test_bin[:, i], gb_bin[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"Class {i} (AUC = {roc_auc:.2f})")

plt.plot([0,1], [0,1], "k--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curves – Gradient Boosting")
plt.legend()
plt.tight_layout()
plt.savefig("../results/performance_plots/gb_roc.png", dpi=300)
plt.show()
