In [95]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Open data

In [96]:
mypath = '/content/gdrive/MyDrive/data'

In [97]:
from os import listdir
from os.path import isfile, join
import numpy as np
import pandas as pd

activities = listdir(mypath)

In [98]:
activities

['idle', 'walking', 'running', 'stairs']

In [99]:
for activity in activities:
  path = join(mypath, activity)
  frames = listdir(path)
  print(activity, ': ', len(frames))

idle :  1039
walking :  1850
running :  3408
stairs :  165


In [100]:
frames = listdir(join(mypath, 'running'))
frame = pd.read_csv(join(join(mypath, 'running'), frames[5]))
frame

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z
0,7.762002,23.898922,5.396531
1,3.160346,19.79526,5.358224
2,3.634398,-11.267113,-4.338293
3,-1.92015,8.815451,1.163582
4,21.83512,28.787882,2.221819
5,5.913678,5.683835,-3.902549
6,-4.429273,7.513005,3.605668
7,6.579266,25.301924,-3.404555
8,3.562572,-5.880159,-5.554548
9,-3.931279,10.103531,-4.711789


# Data preparating

In [101]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

from scipy.fftpack import fft, rfft

from sklearn import preprocessing

### Feature calculation functions

In [102]:
features = [
            'skew_X',
            'skew_Y',
            'skew_Z',
            'kurtosis_X',
            'kurtosis_Y',
            'kurtosis_Z',
            'max_X',
            'max_Y',
            'max_Z',
            'min_X',
            'min_Y',
            'min_Z',
            'mean_X',
            'mean_Y',
            'mean_Z',
            'std_X',
            'std_Y',
            'std_Z',
            'variance_X',
            'variance_Y',
            'variance_Z',
            'median_X',
            'median_Y',
            'median_Z',
            'index_max_X',
            'index_max_Y',
            'index_max_Z',
            'index_min_X',
            'index_min_Y',
            'index_min_Z',
            'correlation_X_Y',
            'correlation_X_Z',
            'correlation_Y_Z',
            'mae_X', 'mae_Y','mae_Z', 'rmse_X', 'rmse_Y', 'rmse_Z']

len(features)

39

Create function that will be calculate all statistic features



In [103]:
def get_stat_features(frame):

  features = []
  features = np.array(features)

  features = np.concatenate((features, frame.skew(axis=0).values), axis=0)

  features = np.concatenate((features, frame.kurt(axis=0).values), axis=0)

  features = np.concatenate((features, frame.max(axis=0).values), axis=0)

  features = np.concatenate((features, frame.min(axis=0).values), axis=0)

  features = np.concatenate((features, frame.mean(axis=0).values), axis=0)

  features = np.concatenate((features, frame.std(axis=0).values), axis=0)

  features = np.concatenate((features, frame.var(axis=0).values), axis=0)

  features = np.concatenate((features, frame.median(axis=0).values), axis=0)

  features = np.concatenate((features, frame.idxmax(axis=0).values), axis=0)

  features = np.concatenate((features, frame.idxmin(axis=0).values), axis=0)

  correlations = frame.corr()

  corr = np.array([correlations['accelerometer_X']['accelerometer_Y'], correlations['accelerometer_X']['accelerometer_Z'], correlations['accelerometer_Z']['accelerometer_Y']])

  features = np.concatenate((features, corr), axis=0)

  frame['mean_X'] = frame.mean(axis=0)['accelerometer_X']
  frame['mean_Y'] = frame.mean(axis=0)['accelerometer_Y']
  frame['mean_Z'] = frame.mean(axis=0)['accelerometer_Z']

  mae_X = mean_absolute_error(frame['accelerometer_X'], frame['mean_X'])
  mae_Y = mean_absolute_error(frame['accelerometer_Y'], frame['mean_Y'])
  mae_Z = mean_absolute_error(frame['accelerometer_Z'], frame['mean_Z'])

  rmse_x = np.sqrt(mean_squared_error(frame['accelerometer_X'], frame['mean_X']))
  rmse_y = np.sqrt(mean_squared_error(frame['accelerometer_Y'], frame['mean_Y']))
  rmse_z = np.sqrt(mean_squared_error(frame['accelerometer_Z'], frame['mean_Z']))

  metrics = np.array([mae_X, mae_Y, mae_Z, rmse_x, rmse_y, rmse_z])

  features = np.concatenate((features, metrics), axis=0)

  return features

In [104]:
len(get_stat_features(frame))

39

### Discover calculation of features

In [107]:
a = np.array([2])

In [108]:
frame.skew(axis=0).values

a = np.concatenate((a, frame.skew(axis=0).values), axis=0)

a

array([ 2.        ,  0.8630012 , -0.4174396 , -0.46333786,  0.        ,
        0.        ,  0.        ])

In [109]:
frame.kurt(axis=0)

accelerometer_X   -0.174712
accelerometer_Y   -0.743597
accelerometer_Z   -0.671174
mean_X             0.000000
mean_Y             0.000000
mean_Z             0.000000
dtype: float64

In [110]:
frame.max(axis=0)

accelerometer_X    24.554934
accelerometer_Y    32.182858
accelerometer_Z     6.569690
mean_X              5.761088
mean_Y              9.552865
mean_Z             -0.449471
dtype: float64

In [111]:
frame.min(axis=0)

accelerometer_X    -6.311115
accelerometer_Y   -16.630125
accelerometer_Z   -12.808979
mean_X              5.761088
mean_Y              9.552865
mean_Z             -0.449471
dtype: float64

In [112]:
frame.mean(axis=0)

accelerometer_X    5.761088
accelerometer_Y    9.552865
accelerometer_Z   -0.449471
mean_X             5.761088
mean_Y             9.552865
mean_Z            -0.449471
dtype: float64

In [113]:
frame.std(axis=0)

accelerometer_X    8.805071e+00
accelerometer_Y    1.367332e+01
accelerometer_Z    5.278808e+00
mean_X             2.710086e-15
mean_Y             1.806724e-15
mean_Z             0.000000e+00
dtype: float64

In [114]:
frame.var(axis=0)

accelerometer_X    7.752928e+01
accelerometer_Y    1.869597e+02
accelerometer_Z    2.786582e+01
mean_X             7.344567e-30
mean_Y             3.264252e-30
mean_Z             0.000000e+00
dtype: float64

In [115]:
frame.idxmax(axis=0)

accelerometer_X    14
accelerometer_Y    14
accelerometer_Z    21
mean_X              0
mean_Y              0
mean_Z              0
dtype: int64

In [116]:
frame.idxmin(axis=0)

accelerometer_X    18
accelerometer_Y    25
accelerometer_Z    22
mean_X              0
mean_Y              0
mean_Z              0
dtype: int64

In [117]:
frame.corr()['accelerometer_X']['accelerometer_Y']

0.5273037588284037

In [118]:
frame.median(axis=0)

accelerometer_X     3.368642
accelerometer_Y    10.115502
accelerometer_Z     0.138864
mean_X              5.761088
mean_Y              9.552865
mean_Z             -0.449471
dtype: float64

In [119]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [120]:
new_frame = frame

In [121]:
new_frame['mean_X'] = frame.mean(axis=0)['accelerometer_X']
new_frame['mean_Y'] = frame.mean(axis=0)['accelerometer_Y']
new_frame['mean_Z'] = frame.mean(axis=0)['accelerometer_Z']

new_frame

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,mean_X,mean_Y,mean_Z
0,7.762002,23.898922,5.396531,5.761088,9.552865,-0.449471
1,3.160346,19.79526,5.358224,5.761088,9.552865,-0.449471
2,3.634398,-11.267113,-4.338293,5.761088,9.552865,-0.449471
3,-1.92015,8.815451,1.163582,5.761088,9.552865,-0.449471
4,21.83512,28.787882,2.221819,5.761088,9.552865,-0.449471
5,5.913678,5.683835,-3.902549,5.761088,9.552865,-0.449471
6,-4.429273,7.513005,3.605668,5.761088,9.552865,-0.449471
7,6.579266,25.301924,-3.404555,5.761088,9.552865,-0.449471
8,3.562572,-5.880159,-5.554548,5.761088,9.552865,-0.449471
9,-3.931279,10.103531,-4.711789,5.761088,9.552865,-0.449471


In [122]:
mean_absolute_error(frame['accelerometer_X'], new_frame['mean_X'])

6.917731973333334

In [123]:
np.sqrt(mean_squared_error(frame['accelerometer_X'], new_frame['mean_X']))

8.657076560407173

In [125]:
frame

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,mean_X,mean_Y,mean_Z
0,7.762002,23.898922,5.396531,5.761088,9.552865,-0.449471
1,3.160346,19.79526,5.358224,5.761088,9.552865,-0.449471
2,3.634398,-11.267113,-4.338293,5.761088,9.552865,-0.449471
3,-1.92015,8.815451,1.163582,5.761088,9.552865,-0.449471
4,21.83512,28.787882,2.221819,5.761088,9.552865,-0.449471
5,5.913678,5.683835,-3.902549,5.761088,9.552865,-0.449471
6,-4.429273,7.513005,3.605668,5.761088,9.552865,-0.449471
7,6.579266,25.301924,-3.404555,5.761088,9.552865,-0.449471
8,3.562572,-5.880159,-5.554548,5.761088,9.552865,-0.449471
9,-3.931279,10.103531,-4.711789,5.761088,9.552865,-0.449471


In [126]:
frame['accelerometer_X'].values

array([ 7.762002,  3.160346,  3.634398, -1.92015 , 21.83512 ,  5.913678,
       -4.429273,  6.579266,  3.562572, -3.931279, 22.816742,  8.714894,
       -3.68707 ,  1.412579, 24.554934, -2.94008 , 10.026917, 22.036232,
       -6.311115,  2.480393, -3.069367, 14.111424,  1.762132, 18.818426,
        3.174712,  0.45011 ,  8.710106,  6.78038 ,  0.521936,  0.301669])

In [127]:
from sklearn import preprocessing

preprocessing.normalize([rfft(frame['accelerometer_X'].values)])[0]

array([ 0.68513244, -0.08134821,  0.00125132,  0.00701579,  0.0217799 ,
       -0.0961322 , -0.09580618,  0.03365516, -0.08052785, -0.11034174,
        0.2103509 ,  0.06070956, -0.01836279,  0.11310969, -0.12442588,
        0.04229043,  0.06483825,  0.31719531, -0.26586656, -0.29137205,
        0.07598014, -0.0076403 ,  0.06543112, -0.09671822,  0.09817477,
        0.16570724, -0.13916759,  0.04521385,  0.25939159,  0.03526837])

# DataFrame

In [134]:
all_data = []

for activity in activities:
    activity_path = join(mypath, activity)
    for item in listdir(activity_path):
        frame = pd.read_csv(join(activity_path, item))
        example = frame.values.flatten()
        all_data.append(np.append(example, activity))

all_data_df = pd.DataFrame(all_data)

all_data_df.rename(columns={all_data_df.columns[-1]: 'label'}, inplace=True)

all_data_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,81,82,83,84,85,86,87,88,89,label
0,0.287304,-0.1005559999999999,9.768343,0.253785,-0.081403,9.758766,0.272939,-0.062249,9.763555,0.296881,...,0.292093,-0.033519,9.744401,0.268151,-0.0239419999999999,9.744401,0.272939,-0.105345,9.787497,idle
1,0.263362,-0.014365,9.787497,0.2489969999999999,-0.033519,9.77792,0.253785,-0.019154,9.782708,0.272939,...,0.2489969999999999,-0.014365,9.768343,0.244209,-0.019154,9.768343,0.2585739999999999,-0.0478839999999999,9.749189,idle
2,-0.679953,4.194642,8.446744,-0.215478,3.888184,9.045294,-0.134075,4.242526,8.743625,-1.426944,...,-0.1819589999999999,4.386178,8.456321,-0.435745,3.921703,8.303091,-0.052672,4.783615,7.881711999999999,idle
3,2.686294,5.544971,7.699753,1.177947,5.348647,8.010999,0.8379709999999999,5.315128,7.991845,0.6033390000000001,...,2.5187,4.046201,8.599973,-0.229843,3.241749,10.85531,-0.201113,1.675941,9.184157,idle
4,0.430956,4.515465,8.743625,-1.8579,5.4396260000000005,6.363788,-0.234632,5.602432,7.742849,-0.3830719999999999,...,-1.240196,6.454768,6.641516,-0.282516,6.828264,6.785168,0.339977,7.015011,7.086836999999999,idle


In [137]:
all_data_df.shape

(6462, 91)

In [146]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [147]:
X = all_data_df.drop('label', axis=1)
y = all_data_df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Normalize

In [148]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Learning SVM model

In [149]:
svm_model = svm.SVC(kernel='rbf')
svm_model.fit(X_train_scaled, y_train)

## Learning Random Forest model

In [150]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

## Assessment of models

In [151]:
svm_predictions = svm_model.predict(X_test_scaled)
rf_predictions = rf_model.predict(X_test_scaled)

print("SVM Classification Report:")
print(classification_report(y_test, svm_predictions))

print("Random Forest Classification Report:")
print(classification_report(y_test, rf_predictions))

SVM Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       317
     running       1.00      1.00      1.00      1022
      stairs       0.89      0.39      0.54        44
     walking       0.95      1.00      0.97       556

    accuracy                           0.98      1939
   macro avg       0.96      0.85      0.88      1939
weighted avg       0.98      0.98      0.98      1939

Random Forest Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       317
     running       1.00      1.00      1.00      1022
      stairs       1.00      0.07      0.13        44
     walking       0.93      1.00      0.96       556

    accuracy                           0.98      1939
   macro avg       0.98      0.77      0.77      1939
weighted avg       0.98      0.98      0.97      1939

