## Analysing performance of a simple SVM classifier

In [1]:
import sys

sys.path.insert(0, "..")

import scipy.io
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

from HAR.transformers import CSIMinMaxScaler


def load_dataset(infile):
    mat = scipy.io.loadmat(infile)
    X = mat["csi"].T
    nsamples = mat["nsamples"].flatten()
    dim = mat["dim"].flatten()
    classnames = list(map(lambda s: s.strip().title(), mat["classnames"]))
    y = []
    for i in range(len(classnames)):
        y += [i] * nsamples[i]
    y = np.array(y)
    return X, y, nsamples, classnames, dim

### [1] Train and test SVM on data captured on some subset of days

In [2]:
X, y, _, classnames, dim = load_dataset("../dataset/rCSI-3.mat")
X = X.reshape(X.shape[0], *dim)
X = CSIMinMaxScaler().fit_transform(X)
X = X.reshape(X.shape[0], dim[0] * dim[1])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train an SVM classifier on the training data
clf = svm.SVC(kernel="rbf", decision_function_shape="ovo")
clf.fit(X_train, y_train)

# Predict the class labels for the test data
y_pred = clf.predict(X_test)

print(f"\n> Test Accuracy: {accuracy_score(y_test, y_pred)*100:.4f}%")
print("\n> Confusion Matrix: ")
print(confusion_matrix(y_test, y_pred))
print("\n> Classification Report :")
print(classification_report(y_test, y_pred, target_names=classnames))


> Test Accuracy: 96.6667%

> Confusion Matrix: 
[[100   0   0]
 [  3  93   4]
 [  1   2  97]]

> Classification Report :
              precision    recall  f1-score   support

       Empty       0.96      1.00      0.98       100
        Walk       0.98      0.93      0.95       100
        Jump       0.96      0.97      0.97       100

    accuracy                           0.97       300
   macro avg       0.97      0.97      0.97       300
weighted avg       0.97      0.97      0.97       300



### [2] Test SVM on data captured on a different day, not part of previous train-test set

In [3]:
import warnings

warnings.filterwarnings("ignore")

X, y, _, classnames, dim = load_dataset("../dataset/rCSI-4.mat")
X = X.reshape(X.shape[0], *dim)
X = CSIMinMaxScaler().fit_transform(X)
X = X.reshape(X.shape[0], dim[0] * dim[1])

y_pred = clf.predict(X)

print(f"\n> Test Accuracy: {accuracy_score(y, y_pred)*100:.4f}%")
print("\n> Confusion Matrix: ")
print(confusion_matrix(y, y_pred))
print("\n> Classification Report :")
print(classification_report(y, y_pred, target_names=classnames))


> Test Accuracy: 33.3333%

> Confusion Matrix: 
[[  0 120   0]
 [  0 120   0]
 [  0 120   0]]

> Classification Report :
              precision    recall  f1-score   support

       Empty       0.00      0.00      0.00       120
        Walk       0.33      1.00      0.50       120
        Jump       0.00      0.00      0.00       120

    accuracy                           0.33       360
   macro avg       0.11      0.33      0.17       360
weighted avg       0.11      0.33      0.17       360



**Observation**: We observe a sharp drop in prediction accuracy as concluded at the end of t-SNE analysis.