In [79]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import os
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import entropy, skew, kurtosis
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, mutual_info_classif, chi2, f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report

### Import data

In [52]:
root_folder_path = 'data'

data_frames = []

for folder in os.listdir(root_folder_path):  # look for every folder in the root folder
    folder_path = os.path.join(root_folder_path, folder)  # path for every folder: idle, running, etc.

    for file_name in os.listdir(folder_path):  # look for every file in the folder
        file_path = os.path.join(folder_path, file_name)  # path for every file in the folder

        data = pd.read_csv(file_path, skiprows=0)  # read data from the file, skipping titles

        data['target'] = folder  # add column with target: idle, running, stairs, or walking
        data_frames.append(data)  # append every data to data_frames list

all_data = pd.concat(data_frames, axis=0, ignore_index=True)  # add all data into the main DataFrame
all_data.columns = ['X', 'Y', 'Z', 'target']

In [53]:
all_data.head()

Unnamed: 0,X,Y,Z,target
0,1.000776,4.616021,8.576031,idle
1,0.718261,4.209007,8.446744,idle
2,-0.909797,-0.282516,9.203311,idle
3,5.09965,0.148441,8.418014,idle
4,1.762132,-0.162806,9.251195,idle


In [54]:
all_data.shape

(193860, 4)

### Data Normalization

In [55]:
# data normalization
scaler = MinMaxScaler()
normalized_data = all_data.copy()
normalized_data[['X', 'Y', 'Z']] = scaler.fit_transform(all_data[['X', 'Y', 'Z']])
normalized_data.head()

Unnamed: 0,X,Y,Z,target
0,0.512769,0.558895,0.609421,idle
1,0.509164,0.553702,0.607771,idle
2,0.488392,0.496395,0.617424,idle
3,0.565066,0.501894,0.607405,idle
4,0.522483,0.497923,0.618035,idle


## Without new features

### SVC Linear

In [87]:
X = normalized_data[['X', 'Y', 'Z']]
y = normalized_data['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)

In [57]:
svc_classifier = SVC()
svc_classifier.fit(X_train, y_train)
svc_prediction = svc_classifier.predict(X_test)

In [58]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, svc_prediction)
print(confusion_matrix)
print(classification_report(y_test, svc_prediction, zero_division=1))

[[ 9185    85     0    36]
 [  362 27609     0  2638]
 [   13   372     4  1148]
 [   54  1531     0 15121]]
              precision    recall  f1-score   support

        idle       0.96      0.99      0.97      9306
     running       0.93      0.90      0.92     30609
      stairs       1.00      0.00      0.01      1537
     walking       0.80      0.91      0.85     16706

    accuracy                           0.89     58158
   macro avg       0.92      0.70      0.69     58158
weighted avg       0.90      0.89      0.88     58158



### Decision Tree

In [88]:
tree_classifier = RandomForestClassifier(n_estimators=10, max_depth=10, random_state=42)
tree_classifier.fit(X_train, y_train)
tree_prediction = tree_classifier.predict(X_test)

In [89]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, tree_prediction)
print(confusion_matrix)
print(classification_report(y_test, tree_prediction, zero_division=1))

[[ 9242    56     0     8]
 [   67 29034     0  1508]
 [    4   428   150   955]
 [   19   893     0 15794]]
              precision    recall  f1-score   support

        idle       0.99      0.99      0.99      9306
     running       0.95      0.95      0.95     30609
      stairs       1.00      0.10      0.18      1537
     walking       0.86      0.95      0.90     16706

    accuracy                           0.93     58158
   macro avg       0.95      0.75      0.76     58158
weighted avg       0.94      0.93      0.92     58158



## New features

In [61]:
mean_x = []
mean_y = []
mean_z = []
target = []
variance_x = []
variance_y = []
variance_z = []
std_x = []
std_y = []
std_z = []
root_mean_square_x = []
root_mean_square_y = []
root_mean_square_z = []
min_x = []
min_y = []
min_z = []
max_x = []
max_y = []
max_z = []
median_x = []
median_y = []
median_z = []

fft_power_x = []
fft_power_y = []
fft_power_z = []
fft_energy_x = []
fft_energy_y = []
fft_energy_z = []
fft_magnitude_x = []
fft_magnitude_y = []
fft_magnitude_z = []
fft_area_x = []
fft_area_y = []
fft_area_z = []
fft_max_amplitude_x = []
fft_max_amplitude_y = []
fft_max_amplitude_z = []
fft_min_amplitude_x = []
fft_min_amplitude_y = []
fft_min_amplitude_z = []
fft_min_index_x = []
fft_min_index_y = []
fft_min_index_z = []
fft_max_index_x = []
fft_max_index_y = []
fft_max_index_z = []
fft_entropy_x = []
fft_entropy_y = []
fft_entropy_z = []
fft_skewness_x = []
fft_skewness_y = []
fft_skewness_z = []
fft_kurtosis_x = []
fft_kurtosis_y = []
fft_kurtosis_z = []
fft_interquartile_range_x = []
fft_interquartile_range_y = []
fft_interquartile_range_z = []
fft_mean_absolute_deviation_x = []
fft_mean_absolute_deviation_y = []
fft_mean_absolute_deviation_z = []

In [62]:
for column in normalized_data.columns:
    for i in range(0, len(normalized_data['X']), 30):

        segment = normalized_data[column].loc[i:i+30]

        if column == 'X':

            mean_x.append(segment.mean())
            min_x.append(segment.min())
            max_x.append(segment.min())
            median_x.append(np.median(segment))
            variance_x.append(np.var(segment))
            std_x.append(np.std(segment))
            root_mean_square_x.append(np.sqrt(np.mean(np.square(segment))))

            x_fft = np.fft.fft(segment, n=len(segment))

            fft_power_x.append((np.mean(np.abs(x_fft) ** 2) / (2 * np.pi)))
            fft_energy_x.append(np.sum(np.abs(x_fft) ** 2) / len(x_fft))

            magnitude_x = np.abs(x_fft)
            fft_magnitude_x.append(np.mean(magnitude_x))
            fft_area_x.append(np.trapz(magnitude_x))

            fft_max_amplitude_x.append(np.max(np.abs(x_fft)))
            fft_min_amplitude_x.append(np.min(np.abs(x_fft)))

            fft_max_index_x.append(np.argmax(np.abs(x_fft)))
            fft_min_index_x.append(np.argmin(np.abs(x_fft)))

            fft_entropy_x = entropy(np.abs(x_fft))
            fft_skewness_x = skew(np.abs(x_fft))
            fft_kurtosis_x = kurtosis(np.abs(x_fft))

            fft_interquartile_range_x = np.percentile(np.abs(x_fft), 75) - np.percentile(np.abs(x_fft), 25)
            fft_mean_absolute_deviation_x = np.mean(np.abs(np.abs(x_fft) - np.mean(np.abs(x_fft))))

        elif column == 'Y':

            mean_y.append(segment.mean())
            min_y.append(segment.min())
            max_y.append(segment.min())
            median_y.append(np.median(segment))
            variance_y.append(np.var(segment))
            std_y.append(np.std(segment))
            root_mean_square_y.append(np.sqrt(np.mean(np.square(segment))))

            y_fft = np.fft.fft(segment, n=len(segment))

            fft_power_y.append((np.mean(np.abs(y_fft) ** 2) / (2 * np.pi)))
            fft_energy_y.append(np.sum(np.abs(y_fft) ** 2) / len(y_fft))

            magnitude_y = np.abs(y_fft)
            fft_magnitude_y.append(np.mean(magnitude_y))
            fft_area_y.append(np.trapz(magnitude_y))

            fft_max_amplitude_y.append(np.max(np.abs(y_fft)))
            fft_min_amplitude_y.append(np.min(np.abs(y_fft)))

            fft_max_index_y.append(np.argmax(np.abs(y_fft)))
            fft_min_index_y.append(np.argmin(np.abs(y_fft)))

            fft_entropy_y = entropy(np.abs(y_fft))
            fft_skewness_y = skew(np.abs(y_fft))
            fft_kurtosis_y = kurtosis(np.abs(y_fft))

            fft_interquartile_range_y = np.percentile(np.abs(y_fft), 75) - np.percentile(np.abs(y_fft), 25)
            fft_mean_absolute_deviation_y = np.mean(np.abs(np.abs(y_fft) - np.mean(np.abs(y_fft))))

        elif column == 'Z':

            mean_z.append(segment.mean())
            min_z.append(segment.min())
            max_z.append(segment.min())
            median_z.append(np.median(segment))
            variance_z.append(np.var(segment))
            std_z.append(np.std(segment))
            root_mean_square_z.append(np.sqrt(np.mean(np.square(segment))))

            z_fft = np.fft.fft(segment, n=len(segment))

            fft_power_z.append((np.mean(np.abs(z_fft) ** 2) / (2 * np.pi)))
            fft_energy_z.append(np.sum(np.abs(z_fft) ** 2) / len(z_fft))

            magnitude_z = np.abs(z_fft)
            fft_magnitude_z.append(np.mean(magnitude_z))
            fft_area_z.append(np.trapz(magnitude_z))

            fft_max_amplitude_z.append(np.max(np.abs(z_fft)))
            fft_min_amplitude_z.append(np.min(np.abs(z_fft)))

            fft_max_index_z.append(np.argmax(np.abs(z_fft)))
            fft_min_index_z.append(np.argmin(np.abs(z_fft)))

            fft_entropy_z = entropy(np.abs(z_fft))
            fft_skewness_z = skew(np.abs(z_fft))
            fft_kurtosis_z = kurtosis(np.abs(z_fft))

            fft_interquartile_range_z = np.percentile(np.abs(z_fft), 75) - np.percentile(np.abs(z_fft), 25)
            fft_mean_absolute_deviation_z = np.mean(np.abs(np.abs(z_fft) - np.mean(np.abs(z_fft))))

        elif column == 'target':
            
            target.append(normalized_data[column].loc[i])

In [63]:
data = {
    'target': target,
    'mean_x': mean_x,
    'mean_y': mean_y,
    'mean_z': mean_z,
    'variance_x': variance_x,
    'variance_y': variance_y,
    'variance_z': variance_z,
    'std_x': std_x,
    'std_y': std_y,
    'std_z': std_z,
    'root_mean_square_x': root_mean_square_x,
    'root_mean_square_y': root_mean_square_y,
    'root_mean_square_z': root_mean_square_z,
    'min_x': min_x,
    'min_y': min_y,
    'min_z': min_z,
    'max_x': max_x,
    'max_y': max_y,
    'max_z': max_z,
    'median_x': median_x,
    'median_y': median_y,
    'median_z': median_z,
    'fft_power_x': fft_power_x,
    'fft_power_y': fft_power_y,
    'fft_power_z': fft_power_z,
    'fft_energy_x': fft_energy_x,
    'fft_energy_y': fft_energy_y,
    'fft_energy_z': fft_energy_z,
    'fft_magnitude_x': fft_magnitude_x,
    'fft_magnitude_y': fft_magnitude_y,
    'fft_magnitude_z': fft_magnitude_z,
    'fft_area_x': fft_area_x,
    'fft_area_y': fft_area_y,
    'fft_area_z': fft_area_z,
    'fft_max_amplitude_x': fft_max_amplitude_x,
    'fft_max_amplitude_y': fft_max_amplitude_y,
    'fft_max_amplitude_z': fft_max_amplitude_z,
    'fft_min_amplitude_x': fft_min_amplitude_x,
    'fft_min_amplitude_y': fft_min_amplitude_y,
    'fft_min_amplitude_z': fft_min_amplitude_z,
    'fft_min_index_x': fft_min_index_x,
    'fft_min_index_y': fft_min_index_y,
    'fft_min_index_z': fft_min_index_z,
    'fft_max_index_x': fft_max_index_x,
    'fft_max_index_y': fft_max_index_y,
    'fft_max_index_z': fft_max_index_z,
    'fft_entropy_x': fft_entropy_x,
    'fft_entropy_y': fft_entropy_y,
    'fft_entropy_z': fft_entropy_z,
    'fft_skewness_x': fft_skewness_x,
    'fft_skewness_y': fft_skewness_y,
    'fft_skewness_z': fft_skewness_z,
    'fft_kurtosis_x': fft_kurtosis_x,
    'fft_kurtosis_y': fft_kurtosis_y,
    'fft_kurtosis_z': fft_kurtosis_z,
    'fft_interquartile_range_x': fft_interquartile_range_x,
    'fft_interquartile_range_y': fft_interquartile_range_y,
    'fft_interquartile_range_z': fft_interquartile_range_z,
    'fft_mean_absolute_deviation_x': fft_mean_absolute_deviation_x,
    'fft_mean_absolute_deviation_y': fft_mean_absolute_deviation_y,
    'fft_mean_absolute_deviation_z': fft_mean_absolute_deviation_z
}

In [64]:
data_with_new_features = pd.DataFrame(data)
data_with_new_features.head(2)

Unnamed: 0,target,mean_x,mean_y,mean_z,variance_x,variance_y,variance_z,std_x,std_y,std_z,...,fft_skewness_z,fft_kurtosis_x,fft_kurtosis_y,fft_kurtosis_z,fft_interquartile_range_x,fft_interquartile_range_y,fft_interquartile_range_z,fft_mean_absolute_deviation_x,fft_mean_absolute_deviation_y,fft_mean_absolute_deviation_z
0,idle,0.502162,0.502016,0.622623,0.000164,0.000205,2.53409e-05,0.012805,0.014301,0.005034,...,5.181363,24.909131,24.550209,24.913634,0.214499,0.257444,0.164945,0.977613,0.693352,0.938221
1,idle,0.498743,0.498477,0.624667,2e-06,1e-06,9.869336e-08,0.001553,0.001016,0.000314,...,5.181363,24.909131,24.550209,24.913634,0.214499,0.257444,0.164945,0.977613,0.693352,0.938221


In [65]:
data_with_new_features.shape

(6462, 61)

### Split data into train dataset and test dataset

In [90]:
X = data_with_new_features.iloc[:, 1:]
y = data_with_new_features['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [67]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((5169, 60), (1293, 60), (5169,), (1293,))

In [68]:
train_data = pd.concat((X_train, y_train), axis = 1)
test_data = pd.concat((X_test, y_test), axis = 1)

## With new features, with selection

### SVC Linear

In [73]:
feature_selection_svc = Pipeline([
    ('chi-square', SelectKBest(chi2, k=10) ),
    ('mutual_info_classif', SelectKBest(score_func=mutual_info_classif, k=10)),
    ('f_classif', SelectKBest(score_func=f_classif, k=10)),
    ('svc', SVC())
])

In [74]:
feature_selection_svc.fit(X_train, y_train)
svc_prediction = feature_selection_svc.predict(X_test)

In [75]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, svc_prediction)
print(confusion_matrix)
print(classification_report(y_test, svc_prediction, zero_division=1))

[[220   0   0   0]
 [  0 689   0   0]
 [  0   0   5  25]
 [  0   0   0 354]]
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       220
     running       1.00      1.00      1.00       689
      stairs       1.00      0.17      0.29        30
     walking       0.93      1.00      0.97       354

    accuracy                           0.98      1293
   macro avg       0.98      0.79      0.81      1293
weighted avg       0.98      0.98      0.97      1293



### Decision Tree

In [93]:
feature_selection_tree = Pipeline([
    ('chi-square', SelectKBest(chi2, k=15) ),
    ('mutual_info_classif', SelectKBest(score_func=mutual_info_classif, k=15)),
    ('f_classif', SelectKBest(score_func=f_classif, k=15)),
    ('tree', RandomForestClassifier(n_estimators=10, max_depth=10, random_state=42))
])

In [94]:
feature_selection_tree.fit(X_train, y_train)
tree_prediction = feature_selection_tree.predict(X_test)

In [95]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, tree_prediction)
print(confusion_matrix)
print(classification_report(y_test, tree_prediction, zero_division=1))

[[220   0   0   0]
 [  0 689   0   0]
 [  0   0  23   7]
 [  0   0   4 350]]
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       220
     running       1.00      1.00      1.00       689
      stairs       0.85      0.77      0.81        30
     walking       0.98      0.99      0.98       354

    accuracy                           0.99      1293
   macro avg       0.96      0.94      0.95      1293
weighted avg       0.99      0.99      0.99      1293

