## Gaussian Mixture Models on CSI data - Proof of Concept

In [1]:
import sys

sys.path.insert(0, "..")

import scipy.io
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA

from HAR.transformers import CSIMinMaxScaler


def load_dataset(infile):
    mat = scipy.io.loadmat(infile)
    X = mat["csi"].T
    nsamples = mat["nsamples"].flatten()
    dim = mat["dim"].flatten()
    classnames = list(map(lambda s: s.strip().title(), mat["classnames"]))
    y = []
    for i in range(len(classnames)):
        y += [i] * nsamples[i]
    y = np.array(y)
    return X, y, nsamples, classnames, dim

In [2]:
# Load data
X, y, _, classnames, dim = load_dataset("../dataset/rCSI-5.mat")

# Min-Max scaling
X = X.reshape(X.shape[0], *dim)
X = CSIMinMaxScaler().fit_transform(X)
X = X.reshape(X.shape[0], dim[0] * dim[1])

In [3]:
pipe = Pipeline(
    [
        ("pca", PCA(n_components=0.95, random_state=42)),
        ("gmm", GaussianMixture(n_components=len(classnames), random_state=42)),
    ]
)

In [4]:
y_pred = pipe.fit_predict(X)

most_frequent = lambda a: np.argmax(np.bincount(a))

for i in range(len(classnames)):
    print(f"Class {i} labelled as {most_frequent(y_pred[y==i])}")

Class 0 labelled as 2
Class 1 labelled as 1
Class 2 labelled as 1


*Observation*: Unreliable classification