In [22]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score


Data

In [23]:
extracted_dir_path = 'homework'

Extraction of features

In [24]:
def extract_features(df):
    features = {}
    for axis in ['X', 'Y', 'Z']:
        data = df[f'accelerometer_{axis}']
        features[f'mean_{axis}'] = data.mean()
        features[f'std_{axis}'] = data.std()
        features[f'max_{axis}'] = data.max()
        features[f'min_{axis}'] = data.min()
        features[f'range_{axis}'] = data.max() - data.min()
        features[f'median_{axis}'] = data.median()
        features[f'var_{axis}'] = data.var()
        features[f'energy_{axis}'] = (data ** 2).sum()
        features[f'iqr_{axis}'] = data.quantile(0.75) - data.quantile(0.25)
    return features

Loading of Data

In [25]:
activity_dirs = ['idle', 'running', 'stairs', 'walking']
activity_data = {}

In [26]:
for activity in activity_dirs:
    activity_path = os.path.join(extracted_dir_path, 'data', activity)
    if not os.path.exists(activity_path):
        print(f"Directory {activity_path} does not exist.")
        continue
    activity_files = os.listdir(activity_path)
    activity_data[activity] = []
    for file in activity_files:
        if file.endswith('.csv'):
            file_path = os.path.join(activity_path, file)
            df = pd.read_csv(file_path)
            activity_data[activity].append(df)

Dataset

In [27]:
data = []
labels = []

for activity, dfs in activity_data.items():
    for df in dfs:
        features = extract_features(df)
        data.append(features)
        labels.append(activity)

data = pd.DataFrame(data)
labels = pd.Series(labels)


Dividing for training / testing sets

In [28]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42)

Standardize the features

In [29]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

SVM 

In [30]:
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
print("SVM Classification Report")
print(classification_report(y_test, svm_predictions))
print("SVM Accuracy:", accuracy_score(y_test, svm_predictions))


SVM Classification Report
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       317
     running       1.00      1.00      1.00      1035
      stairs       0.90      0.76      0.83        50
     walking       0.98      0.99      0.99       537

    accuracy                           0.99      1939
   macro avg       0.97      0.94      0.95      1939
weighted avg       0.99      0.99      0.99      1939

SVM Accuracy: 0.9917483238782878


Random Forest

In [31]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
print("Random Forest Classification Report")
print(classification_report(y_test, rf_predictions))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_predictions))

Random Forest Classification Report
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       317
     running       1.00      1.00      1.00      1035
      stairs       1.00      0.92      0.96        50
     walking       0.99      1.00      1.00       537

    accuracy                           1.00      1939
   macro avg       1.00      0.98      0.99      1939
weighted avg       1.00      1.00      1.00      1939

Random Forest Accuracy: 0.9979370809695719
