# Activity Monitoring (Machine Learning Project)
### &nbsp; &nbsp; BSEF21M001 - Yeshal Khan
### &nbsp; &nbsp; BSEF21M008 - Zohaib Shahid
### &nbsp; &nbsp; BSEF21M016 - Faiqa Nasir

# Importing the necessary Libraries for the activity monitoring

In [5]:
import numpy as np # type: ignore
from sklearn.preprocessing import StandardScaler,Normalizer
from sklearn.impute import SimpleImputer
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# Path to the data files

In [6]:
data_path = "D:\\5th Semester\\ML\\ML Project\\bbh\\"

# Loading the training and testing data and label files


In [7]:
def load_dataset(data_path):
    # Load training data and training labels from files
    train_accel_ms = np.load(data_path + "training/trainMSAccelerometer.npy")
    train_gyro_ms = np.load(data_path + "training/trainMSGyroscope.npy")
    train_accel = np.load(data_path + "training/trainAccelerometer.npy")
    train_gravity = np.load(data_path + "training/trainGravity.npy")
    train_accel_jin = np.load(data_path + "training/trainJinsAccelerometer.npy")
    train_gyro_jin = np.load(data_path + "training/trainJinsGyroscope.npy")
    train_lin_accel = np.load(data_path + "training/trainLinearAcceleration.npy")
    train_magnetometer = np.load(data_path + "training/trainMagnetometer.npy")
    train_gyro = np.load(data_path + "training/trainGyroscope.npy")
    train_labels = np.load(data_path + "training/trainlabels.npy")

    # Load testing data and testing labels from files
    test_accel_ms = np.load(data_path + "testing/testMSAccelerometer.npy")
    test_gyro_ms = np.load(data_path + "testing/testMSGyroscope.npy")
    test_accel = np.load(data_path + "testing/testAccelerometer.npy")
    test_gravity = np.load(data_path + "testing/testGravity.npy")
    test_accel_jin = np.load(data_path + "testing/testJinsAccelerometer.npy")
    test_gyro_jin = np.load(data_path + "testing/testJinsGyroscope.npy")
    test_lin_accel = np.load(data_path + "testing/testLinearAcceleration.npy")
    test_magnetometer = np.load(data_path + "testing/testMagnetometer.npy")
    test_gyro = np.load(data_path + "testing/testGyroscope.npy")
    test_labels = np.load(data_path + "testing/testlabels.npy")

    training_data = [train_accel_ms, train_gyro_ms, train_accel, train_gravity, train_accel_jin, train_gyro_jin, train_lin_accel, train_magnetometer, train_gyro]
    testing_data = [test_accel_ms, test_gyro_ms, test_accel, test_gravity, test_accel_jin, test_gyro_jin, test_lin_accel, test_magnetometer, test_gyro]

    return training_data, train_labels, testing_data, test_labels

In [8]:
training_data, train_labels, testing_data, test_labels = load_dataset(data_path)

# Applying Pre-Processing Techniques on each data set

In [9]:
# Pre-process the training and testing data using the StandardScaler
def pre_process_dataset_scalar(data_sets):
    pre_processed_data = []
    for data_set in data_sets:
        scalar = StandardScaler()
        impute = SimpleImputer()
        N,T,S=data_set.shape
        reshaped_data = np.reshape(data_set, (N, T*S))
        imputed_data = impute.fit_transform(reshaped_data)
        pre_processed_data.append(scalar.fit_transform(imputed_data).reshape((N, T, S)))
    return pre_processed_data

# Pre-process the training and testing data using the Normalizer
def pre_process_dataset_normalizer(data_sets):
    pre_processed_data = []
    for data_set in data_sets:
        normalizer = Normalizer()
        impute = SimpleImputer()
        N,T,S=data_set.shape
        reshaped_data = np.reshape(data_set, (N, T*S))
        imputed_data = impute.fit_transform(reshaped_data)
        pre_processed_data.append(normalizer.fit_transform(imputed_data).reshape((N, T, S)))
    return pre_processed_data

# Feature Extraction for each dataset

In [10]:
def extract_features(data):
    features = []
    for data_set in data:
        features.append(np.mean(data_set, axis=1))
        features.append(np.max(data_set, axis=1))
        features.append(np.min(data_set, axis=1))
        features.append(np.std(data_set, axis=1))
        features.append(np.var(data_set, axis=1))
        features.append(np.median(data_set, axis=1))
        features.append(np.percentile(data_set, 25, axis=1))
        features.append(np.percentile(data_set, 75, axis=1))
    return np.concatenate(features, axis=1)

# Dimensions of Testing and Training data after the pre-processing and feature extraction

In [11]:
X_train_scalar = extract_features(pre_process_dataset_scalar(training_data))
X_test_scalar = extract_features(pre_process_dataset_scalar(testing_data))

X_train_normalizer = extract_features(pre_process_dataset_normalizer(training_data))
X_test_normalizer = extract_features(pre_process_dataset_normalizer(testing_data))

print("Training data shape after pre-processing and feature extraction: ", X_train_scalar.shape)
print("Testing data shape after pre-processing and feature extraction: ", X_test_scalar.shape)

Training data shape after pre-processing and feature extraction:  (2284, 216)
Testing data shape after pre-processing and feature extraction:  (2288, 216)


# Training and Testing of the Model

# Random Forest Classifier

In [12]:
random_forest = RandomForestClassifier()
random_forest.fit(X_train_normalizer, train_labels)
predictions = random_forest.predict(X_test_normalizer)
accuracy = accuracy_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions, average='macro')
recall = recall_score(test_labels, predictions, average='macro')
rf_confusion = confusion_matrix(test_labels, predictions)

print("Random Forest Classifier Evaluation Metrics")
print("===========================================")
print(f"Accuracy:  {accuracy * 100:.2f} %")
print(f"F1 Score:  {f1:.2f}")
print(f"Recall Score:  {recall:.2f}")
print("Confusion Matrix:")
print(rf_confusion)

Random Forest Classifier Evaluation Metrics
Accuracy:  68.23 %
F1 Score:  0.68
Recall Score:  0.68
Confusion Matrix:
[[39  0  0 ...  0  0  0]
 [ 0 51  0 ...  0  0  0]
 [ 0 10 20 ...  0  1  1]
 ...
 [ 0  0  3 ... 17  0  0]
 [ 0  0  0 ...  0 39  0]
 [ 0  0  0 ...  0  0 33]]


# Support Vector Machine

In [13]:
SVM = SVC()
SVM.fit(X_train_normalizer, train_labels)
predicted_labels = SVM.predict(X_test_normalizer)
accuracy = accuracy_score(test_labels, predicted_labels)
recall = recall_score(test_labels, predicted_labels, average='macro')
averageF1 = f1_score(test_labels, predicted_labels, average='macro')
SVM_confusion_matrix = confusion_matrix(test_labels, predicted_labels)

print("SVM Classifier Evaluation Metrics")
print("===========================================")
print(f"Accuracy:  {accuracy * 100:.2f} %")
print(f"F1 Score:  {f1:.2f}")
print(f"Recall Score:  {recall:.2f}")
print("Confusion Matrix:")
print(SVM_confusion_matrix)

SVM Classifier Evaluation Metrics
Accuracy:  52.23 %
F1 Score:  0.68
Recall Score:  0.52
Confusion Matrix:
[[38  0  0 ...  0  0  0]
 [ 0 46  4 ...  0  0  0]
 [ 0 10 13 ...  2  0  0]
 ...
 [ 0  1  4 ... 12  0  0]
 [ 0  0  0 ...  0 34  0]
 [ 0  0  0 ...  0  0 15]]


# Logistic Regression

In [16]:
Logistic_Regression = LogisticRegression(max_iter=3000)
Logistic_Regression.fit(X_train_scalar,train_labels)
predicted_labels = Logistic_Regression.predict(X_test_scalar)
accuracy = accuracy_score(test_labels,predicted_labels)
recall = recall_score(test_labels,predicted_labels,average='macro')
averageF1 = f1_score(test_labels,predicted_labels,average='macro')
accuracy = accuracy_score(test_labels, predicted_labels)
recall = recall_score(test_labels, predicted_labels, average='macro')
averageF1 = f1_score(test_labels, predicted_labels, average='macro')
logistic_Regression_confusion_matrix = confusion_matrix(test_labels, predicted_labels)

print("Logistic Regression Classifier Evaluation Metrics")
print("=================================================")
print(f"Accuracy:  {accuracy * 100:.2f} %")
print(f"F1 Score:  {averageF1:.2f}")
print(f"Recall Score:  {recall:.2f}")
print("Confusion Matrix:")
print(logistic_Regression_confusion_matrix)

Logistic Regression Classifier Evaluation Metrics
Accuracy:  57.30 %
F1 Score:  0.57
Recall Score:  0.58
Confusion Matrix:
[[38  0  0 ...  0  0  0]
 [ 0 44  4 ...  0  0  0]
 [ 0  2 21 ...  1  0  0]
 ...
 [ 0  1  1 ...  8  0  0]
 [ 0  0  0 ...  0 22  0]
 [ 0  0  0 ...  1  0 25]]


# Naive Bayes Classifier

In [15]:
Naive_Bayes = GaussianNB()
Naive_Bayes.fit(X_train_scalar, train_labels)
predicted_labels = Naive_Bayes.predict(X_test_scalar)
accuracy = accuracy_score(test_labels, predicted_labels)
recall = recall_score(test_labels, predicted_labels, average='macro')
averageF1 = f1_score(test_labels, predicted_labels, average='macro')
naive_bayes_confusion_matrix = confusion_matrix(test_labels, predicted_labels)
print("Naive Bayes Classifier Evaluation Metrics")
print("===========================================")
print(f"Accuracy:  {accuracy * 100:.2f} %")
print(f"F1 Score:  {averageF1:.2f}")
print(f"Recall Score:  {recall:.2f}")
print("Confusion Matrix:")
print(naive_bayes_confusion_matrix)


Naive Bayes Classifier Evaluation Metrics
Accuracy:  43.27 %
F1 Score:  0.41
Recall Score:  0.44
Confusion Matrix:
[[39  0  0 ...  0  0  0]
 [ 0 43  1 ...  0  1  0]
 [ 0  8 12 ...  0  3  0]
 ...
 [ 0  0  1 ...  1  0  1]
 [ 0  0  1 ...  0 31  0]
 [ 0  0  0 ...  0  0 21]]
