In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from factor_analyzer import FactorAnalyzer
from sklearn.decomposition import FactorAnalysis
from sklearn.preprocessing import StandardScaler
from mpl_toolkits.mplot3d import Axes3D
from sklearn.pipeline import Pipeline

In [2]:
train = pd.read_csv("./input/Train/train.csv")
test = pd.read_csv("./input/Test/test.csv")

print("shape of train: ", train.shape, "shape of test: ", test.shape)

shape of train:  (7767, 563) shape of test:  (3162, 563)


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [231]:
# First, we need to standardize the data to get better performance
features = train.columns.values.tolist()
features.remove('Activity')
features.remove('subject')

# Separating out the features
x = train.loc[:, features].values
test_features = test.loc[:, features].values

# Separating out the target
y = train.loc[:,['Activity']].values
test_labels = test.loc[:,['Activity']].values

train_features, validation_features, train_labels, validation_labels = train_test_split(x, y, test_size=0.25, random_state=1300)

train_labels = pd.DataFrame(train_labels, columns=['Activity'])
validation_labels = pd.DataFrame(validation_labels, columns=['Activity'])
print(train_features.shape)
print(test_features.shape)

(5825, 561)
(3162, 561)


In [6]:
from sklearn.neural_network import MLPClassifier

In [195]:
mlp = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=1000, solver='adam', activation='relu', verbose=False)

In [196]:
mlp.fit(train_features, np.ravel(train_labels))

MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=1000)

In [197]:
pred_train = mlp.predict(train_features)
pred_train

array(['WALKING', 'WALKING_DOWNSTAIRS', 'LAYING', ..., 'WALKING',
       'STANDING', 'WALKING_DOWNSTAIRS'], dtype='<U18')

In [198]:
from sklearn.metrics import classification_report, confusion_matrix

In [199]:
confusion_matrix(train_labels, pred_train)

array([[1049,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0],
       [   0,   36,    8,    0,    0,    0,    0,    0,    0,    0,    0,
           0],
       [   1,   13,   25,    0,    0,    0,    0,    0,    0,    0,    0,
           0],
       [   0,    0,    0,  926,    0,    1,   30,    0,    0,    0,    0,
           0],
       [   0,    0,    0,    0,   43,    0,    0,   12,    0,    0,    0,
           0],
       [   0,    0,    0,    1,    0,   14,    0,    0,    1,    0,    0,
           0],
       [   0,    0,    0,    7,    0,    0, 1059,    0,    0,    0,    0,
           0],
       [   0,    0,    2,    0,    3,    0,    0,   64,    1,    0,    0,
           0],
       [   0,    0,    0,    0,    0,    0,    0,    1,   38,    0,    0,
           0],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,  928,    0,
           0],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,  757,
           0],
       [   0,    0,  

In [200]:
print(classification_report(train_labels, pred_train, zero_division=0))

                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00      1049
        LIE_TO_SIT       0.73      0.82      0.77        44
      LIE_TO_STAND       0.71      0.64      0.68        39
           SITTING       0.99      0.97      0.98       957
        SIT_TO_LIE       0.93      0.78      0.85        55
      SIT_TO_STAND       0.93      0.88      0.90        16
          STANDING       0.97      0.99      0.98      1066
      STAND_TO_LIE       0.83      0.91      0.87        70
      STAND_TO_SIT       0.95      0.97      0.96        39
           WALKING       1.00      1.00      1.00       928
WALKING_DOWNSTAIRS       1.00      1.00      1.00       757
  WALKING_UPSTAIRS       1.00      1.00      1.00       805

          accuracy                           0.99      5825
         macro avg       0.92      0.91      0.92      5825
      weighted avg       0.99      0.99      0.99      5825



In [201]:
pred = mlp.predict(validation_features)
pred

array(['STANDING', 'WALKING_UPSTAIRS', 'SITTING', ...,
       'WALKING_DOWNSTAIRS', 'LAYING', 'LAYING'], dtype='<U18')

In [202]:
confusion_matrix(validation_labels, pred)

array([[360,   0,   2,   1,   1,   0,   0,   0,   0,   0,   0,   0],
       [  0,  14,   1,   0,   0,   0,   0,   1,   0,   0,   0,   0],
       [  0,   8,   8,   1,   0,   0,   0,   1,   0,   0,   0,   0],
       [  0,   0,   0, 311,   0,   0,  25,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,  10,   1,   1,   7,   1,   0,   0,   0],
       [  0,   0,   0,   1,   0,   5,   0,   0,   1,   0,   0,   0],
       [  0,   0,   0,   5,   0,   0, 352,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   6,   0,   0,  13,   1,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   8,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0, 297,   1,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   1, 229,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   2, 266]],
      dtype=int64)

In [203]:
print(classification_report(validation_labels, pred, zero_division=0))

                    precision    recall  f1-score   support

            LAYING       1.00      0.99      0.99       364
        LIE_TO_SIT       0.64      0.88      0.74        16
      LIE_TO_STAND       0.73      0.44      0.55        18
           SITTING       0.97      0.93      0.95       336
        SIT_TO_LIE       0.59      0.50      0.54        20
      SIT_TO_STAND       0.83      0.71      0.77         7
          STANDING       0.93      0.99      0.96       357
      STAND_TO_LIE       0.59      0.65      0.62        20
      STAND_TO_SIT       0.73      1.00      0.84         8
           WALKING       1.00      1.00      1.00       298
WALKING_DOWNSTAIRS       0.99      1.00      0.99       230
  WALKING_UPSTAIRS       1.00      0.99      1.00       268

          accuracy                           0.96      1942
         macro avg       0.83      0.84      0.83      1942
      weighted avg       0.97      0.96      0.96      1942



In [204]:
# First, we need to standardize the data to get better performance
test_features = test.columns.values.tolist()
test_features.remove('Activity')
test_features.remove('subject')

# Separating out the features
x_test = test.loc[:, test_features].values

# Separating out the target
y_test = test.loc[:,['Activity']].values

print(x_test.shape)

(3162, 561)


In [205]:
result = mlp.predict(x_test)
result

array(['STANDING', 'STANDING', 'STANDING', ..., 'WALKING_UPSTAIRS',
       'WALKING_UPSTAIRS', 'WALKING_UPSTAIRS'], dtype='<U18')

In [206]:
confusion_matrix(y_test, result)

array([[539,   0,   3,   0,   0,   0,   2,   0,   0,   0,   0,   1],
       [  0,  16,   8,   0,   0,   0,   0,   0,   1,   0,   0,   0],
       [  0,  13,  10,   0,   0,   0,   0,   1,   2,   0,   0,   1],
       [  0,   0,   0, 427,   0,   0,  77,   1,   3,   0,   0,   0],
       [  0,   0,   0,   0,  21,   0,   0,  10,   0,   0,   0,   1],
       [  0,   0,   0,   0,   0,   9,   0,   0,   1,   0,   0,   0],
       [  0,   0,   0,   8,   0,   0, 545,   0,   2,   1,   0,   0],
       [  0,   1,   0,   1,  10,   0,   2,  24,   9,   0,   0,   2],
       [  0,   0,   0,   2,   0,   2,   0,   3,  14,   1,   0,   1],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0, 479,   3,  14],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   9, 395,  16],
       [  0,   0,   0,   0,   0,   0,   0,   1,   3,  51,   3, 413]],
      dtype=int64)

In [208]:
print(classification_report(y_test, result, zero_division=0))

                    precision    recall  f1-score   support

            LAYING       1.00      0.99      0.99       545
        LIE_TO_SIT       0.53      0.64      0.58        25
      LIE_TO_STAND       0.48      0.37      0.42        27
           SITTING       0.97      0.84      0.90       508
        SIT_TO_LIE       0.68      0.66      0.67        32
      SIT_TO_STAND       0.82      0.90      0.86        10
          STANDING       0.87      0.98      0.92       556
      STAND_TO_LIE       0.60      0.49      0.54        49
      STAND_TO_SIT       0.40      0.61      0.48        23
           WALKING       0.89      0.97      0.92       496
WALKING_DOWNSTAIRS       0.99      0.94      0.96       420
  WALKING_UPSTAIRS       0.92      0.88      0.90       471

          accuracy                           0.91      3162
         macro avg       0.76      0.77      0.76      3162
      weighted avg       0.92      0.91      0.91      3162



### Train on selected features from RFE

In [218]:
rfe_features = np.array(['tBodyAcc-Mean-1                ', 'tBodyAcc-Mean-2                ',
 'tBodyAcc-Mean-3                ', 'tBodyAcc-STD-1                 ',
 'tBodyAcc-Mad-1                 ', 'tBodyAcc-Max-1                 ',
 'tBodyAcc-Max-2                 ', 'tBodyAcc-Min-1                 ',
 'tBodyAcc-SMA-1                 ', 'tBodyAcc-Energy-1              ',
 'tBodyAcc-Energy-2              ', 'tBodyAcc-IQR-1                 ',
 'tBodyAcc-ropy-1                ', 'tBodyAcc-ropy-1                .1',
 'tBodyAcc-ropy-1                .2', 'tBodyAcc-Correlation-1         ',
 'tBodyAcc-Correlation-2         ', 'tBodyAcc-Correlation-3         ',
 'tGravityAcc-Mean-1             ', 'tGravityAcc-Mean-2             ',
 'tGravityAcc-Mean-3             ', 'tGravityAcc-STD-1              ',
 'tGravityAcc-STD-2              ', 'tGravityAcc-STD-3              ',
 'tGravityAcc-Mad-1              ', 'tGravityAcc-Mad-2              ',
 'tGravityAcc-Mad-3              ', 'tGravityAcc-Max-1              ',
 'tGravityAcc-Max-2              ', 'tGravityAcc-Max-3              ',
 'tGravityAcc-Min-1              ', 'tGravityAcc-Min-2              ',
 'tGravityAcc-Min-3              ', 'tGravityAcc-SMA-1              ',
 'tGravityAcc-Energy-1           ', 'tGravityAcc-Energy-2           ',
 'tGravityAcc-Energy-3           ', 'tGravityAcc-IQR-1              ',
 'tGravityAcc-IQR-2              ', 'tGravityAcc-ropy-1             ',
 'tGravityAcc-ropy-1             .1', 'tGravityAcc-ARCoeff-1          ',
 'tGravityAcc-ARCoeff-2          ', 'tGravityAcc-ARCoeff-3          ',
 'tGravityAcc-ARCoeff-4          ', 'tGravityAcc-ARCoeff-5          ',
 'tGravityAcc-ARCoeff-6          ', 'tGravityAcc-ARCoeff-7          ',
 'tGravityAcc-ARCoeff-8          ', 'tGravityAcc-ARCoeff-9          ',
 'tGravityAcc-ARCoeff-10         ', 'tGravityAcc-ARCoeff-11         ',
 'tGravityAcc-ARCoeff-12         ', 'tGravityAcc-Correlation-1      ',
 'tGravityAcc-Correlation-2      ', 'tGravityAcc-Correlation-3      ',
 'tBodyAccJerk-STD-1             ', 'tBodyAccJerk-STD-2             ',
 'tBodyAccJerk-Mad-1             ', 'tBodyAccJerk-Mad-2             ',
 'tBodyAccJerk-Max-1             ', 'tBodyAccJerk-Max-3             ',
 'tBodyAccJerk-SMA-1             ', 'tBodyAccJerk-Energy-1          ',
 'tBodyAccJerk-Energy-3          ', 'tBodyAccJerk-IQR-1             ',
 'tBodyAccJerk-IQR-2             ', 'tBodyAccJerk-IQR-3             ',
 'tBodyAccJerk-ropy-1            ', 'tBodyAccJerk-ropy-1            .1',
 'tBodyAccJerk-ropy-1            .2', 'tBodyAccJerk-Correlation-1     ',
 'tBodyGyro-Mean-1               ', 'tBodyGyro-Mean-3               ',
 'tBodyGyro-STD-1                ', 'tBodyGyro-STD-2                ',
 'tBodyGyro-STD-3                ', 'tBodyGyro-Mad-1                ',
 'tBodyGyro-Mad-2                ', 'tBodyGyro-Mad-3                ',
 'tBodyGyro-Max-1                ', 'tBodyGyro-Min-1                ',
 'tBodyGyro-SMA-1                ', 'tBodyGyro-Energy-1             ',
 'tBodyGyro-Energy-2             ', 'tBodyGyro-Energy-3             ',
 'tBodyGyro-IQR-1                ', 'tBodyGyro-IQR-2                ',
 'tBodyGyro-IQR-3                ', 'tBodyGyro-ropy-1               ',
 'tBodyGyro-ARCoeff-5            ', 'tBodyGyro-Correlation-1        ',
 'tBodyGyro-Correlation-2        ', 'tBodyGyro-Correlation-3        ',
 'tBodyGyroJerk-STD-1            ', 'tBodyGyroJerk-Mad-1            ',
 'tBodyGyroJerk-Mad-3            ', 'tBodyGyroJerk-Min-1            ',
 'tBodyGyroJerk-SMA-1            ', 'tBodyGyroJerk-Energy-1         ',
 'tBodyGyroJerk-IQR-1            ', 'tBodyGyroJerk-IQR-3            ',
 'tBodyGyroJerk-ropy-1           ', 'tBodyGyroJerk-ropy-1           .2',
 'tBodyGyroJerk-ARCoeff-1        ', 'tBodyGyroJerk-ARCoeff-3        ',
 'tBodyGyroJerk-Correlation-1    ', 'tBodyGyroJerk-Correlation-2    ',
 'tBodyAccMag-Mean-1             ', 'tBodyAccMag-STD-1              ',
 'tBodyAccMag-Mad-1              ', 'tBodyAccMag-Max-1              ',
 'tBodyAccMag-SMA-1              ', 'tBodyAccMag-Energy-1           ',
 'tBodyAccMag-ARCoeff-1          ', 'tBodyAccMag-ARCoeff-2          ',
 'tGravityAccMag-Mean-1          ', 'tGravityAccMag-STD-1           ',
 'tGravityAccMag-Mad-1           ', 'tGravityAccMag-Max-1           ',
 'tGravityAccMag-SMA-1           ', 'tGravityAccMag-Energy-1        ',
 'tGravityAccMag-ARCoeff-1       ', 'tGravityAccMag-ARCoeff-2       ',
 'tBodyAccJerkMag-Mean-1         ', 'tBodyAccJerkMag-STD-1          ',
 'tBodyAccJerkMag-Mad-1          ', 'tBodyAccJerkMag-SMA-1          ',
 'tBodyAccJerkMag-Energy-1       ', 'tBodyAccJerkMag-IQR-1          ',
 'tBodyAccJerkMag-ropy-1         ', 'tBodyGyroMag-Mean-1            ',
 'tBodyGyroMag-SMA-1             ', 'tBodyGyroJerkMag-Mean-1        ',
 'fBodyAcc-Mean-1                ', 'fBodyAcc-STD-1                 ',
 'fBodyAcc-Mad-1                 ', 'fBodyAcc-Max-1                 ',
 'fBodyAcc-Max-2                 ', 'fBodyAcc-Energy-1              ',
 'fBodyAcc-MaxInds-1             ', 'fBodyAcc-MeanFreq-3            ',
 'fBodyAcc-Skewness-1            ', 'fBodyAcc-Kurtosis-1            ',
 'fBodyAcc-Skewness-1            .1', 'fBodyAcc-Kurtosis-1            .1',
 'fBodyAcc-BandsEnergyOld-1      ', 'fBodyAcc-BandsEnergyOld-2      ',
 'fBodyAcc-BandsEnergyOld-9      ', 'fBodyAcc-BandsEnergyOld-13     ',
 'fBodyAcc-BandsEnergyOld-15     ', 'fBodyAcc-BandsEnergyOld-29     ',
 'fBodyAcc-BandsEnergyOld-31     ', 'fBodyAcc-BandsEnergyOld-38     ',
 'fBodyAccJerk-Mean-1            ', 'fBodyAccJerk-STD-1             ',
 'fBodyAccJerk-STD-2             ', 'fBodyAccJerk-Mad-1             ',
 'fBodyAccJerk-Max-1             ', 'fBodyAccJerk-SMA-1             ',
 'fBodyAccJerk-Energy-1          ', 'fBodyAccJerk-ropy-1            ',
 'fBodyAccJerk-MaxInds-1         ', 'fBodyAccJerk-MaxInds-3         ',
 'fBodyAccJerk-BandsEnergyOld-1  ', 'fBodyAccJerk-BandsEnergyOld-9  ',
 'fBodyAccJerk-BandsEnergyOld-13 ', 'fBodyAccJerk-BandsEnergyOld-29 ',
 'fBodyAccJerk-BandsEnergyOld-31 ', 'fBodyAccJerk-BandsEnergyOld-38 ',
 'fBodyGyro-Mean-1               ', 'fBodyGyro-Mean-2               ',
 'fBodyGyro-STD-1                ', 'fBodyGyro-STD-2                ',
 'fBodyGyro-STD-3                ', 'fBodyGyro-Mad-1                ',
 'fBodyGyro-Mad-2                ', 'fBodyGyro-Max-1                ',
 'fBodyGyro-Max-2                ', 'fBodyGyro-Max-3                ',
 'fBodyGyro-Energy-1             ', 'fBodyGyro-Energy-2             ',
 'fBodyGyro-Energy-3             ', 'fBodyGyro-ropy-1               ',
 'fBodyGyro-ropy-1               .1', 'fBodyGyro-MaxInds-1            ',
 'fBodyGyro-MaxInds-2            ', 'fBodyGyro-MaxInds-3            ',
 'fBodyGyro-MeanFreq-1           ', 'fBodyGyro-Skewness-1           .1',
 'fBodyGyro-Skewness-1           .2', 'fBodyGyro-Kurtosis-1           .2',
 'fBodyGyro-BandsEnergyOld-1     ', 'fBodyGyro-BandsEnergyOld-2     ',
 'fBodyGyro-BandsEnergyOld-9     ', 'fBodyGyro-BandsEnergyOld-13    ',
 'fBodyGyro-BandsEnergyOld-15    ', 'fBodyGyro-BandsEnergyOld-23    ',
 'fBodyGyro-BandsEnergyOld-27    ', 'fBodyGyro-BandsEnergyOld-29    ',
 'fBodyGyro-BandsEnergyOld-37    ', 'fBodyGyro-BandsEnergyOld-41    ',
 'fBodyAccMag-Mean-1             ', 'fBodyAccMag-STD-1              ',
 'fBodyAccMag-Mad-1              ', 'fBodyAccMag-Max-1              ',
 'fBodyAccMag-SMA-1              ', 'fBodyAccMag-Energy-1           ',
 'fBodyAccMag-IQR-1              ', 'fBodyAccMag-ropy-1             ',
 'fBodyAccMag-MaxInds-1          ', 'fBodyAccMag-MeanFreq-1         ',
 'fBodyAccJerkMag-STD-1          ', 'fBodyAccJerkMag-Max-1          ',
 'fBodyAccJerkMag-Energy-1       ', 'fBodyGyroMag-MeanFreq-1        ',
 'tBodyGyro-AngleWRTGravity-1    ', 'tXAxisAcc-AngleWRTGravity-1    ',
 'tYAxisAcc-AngleWRTGravity-1    ', 'tZAxisAcc-AngleWRTGravity-1    '])

rfe_features.shape

(220,)

### MLP on SMOTE Data

In [234]:
# Imports
import tensorflow
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Configuration options
feature_vector_length = 561
num_classes = 12

In [238]:
map = {
  "WALKING": 1,
  "WALKING_UPSTAIRS ": 2,
  "WALKING_DOWNSTAIRS": 3,
  "SITTING": 4,
  "STANDING": 5,
  "LAYING": 6,
  "STAND_TO_SIT": 7,
  "SIT_TO_STAND": 8,
  "SIT_TO_LIE": 9,
  "LIE_TO_SIT": 10,
  "STAND_TO_LIE": 11,
  "LIE_TO_STAND": 12
}


Unnamed: 0,Labels
0,5
1,5
2,5
3,5
4,5
...,...
7762,2
7763,2
7764,2
7765,2


In [233]:
X_train = train_features
X_test = test_features

Y_train = to_categorical(train_class, num_classes)

ValueError: invalid literal for int() with base 10: 'WALKING'

In [223]:
input_shape = (feature_vector_length,)

model = Sequential()
model.add(Dense(350, input_shape=input_shape, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

In [227]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(train_features, train_labels, epochs=10, batch_size=250, verbose=1, validation_split=0.2)

Epoch 1/10


ValueError: in user code:

    D:\Users\admin\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    D:\Users\admin\lib\site-packages\keras\engine\training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    D:\Users\admin\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    D:\Users\admin\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    D:\Users\admin\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    D:\Users\admin\lib\site-packages\keras\engine\training.py:835 run_step  **
        outputs = model.train_step(data)
    D:\Users\admin\lib\site-packages\keras\engine\training.py:788 train_step
        loss = self.compiled_loss(
    D:\Users\admin\lib\site-packages\keras\engine\compile_utils.py:201 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    D:\Users\admin\lib\site-packages\keras\losses.py:141 __call__
        losses = call_fn(y_true, y_pred)
    D:\Users\admin\lib\site-packages\keras\losses.py:245 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    D:\Users\admin\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    D:\Users\admin\lib\site-packages\keras\losses.py:1665 categorical_crossentropy
        return backend.categorical_crossentropy(
    D:\Users\admin\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    D:\Users\admin\lib\site-packages\keras\backend.py:4839 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    D:\Users\admin\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1161 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 1) and (None, 12) are incompatible
