# Experiments with Thrust 2 datasets

In [40]:
import numpy as np
import pandas as pd
from lazypredict.Supervised import LazyClassifier
from joblib import parallel_backend

In [30]:
dfs = pd.read_excel("../data/Thrust 2 Data Collections/lifting_data_simplified.xlsx",
                    sheet_name=None, # read all sheets (-> dict of DataFrames)
                   )

## Test/train and feature/target selection

In [42]:
def experiment(test_trial = "trial1", 
               target = "rule1",
               features = slice("pelvis_ax", None)):

    df_train = pd.concat(
        [df for trial, df in dfs.items() if trial not in test_trial],
        ignore_index=True
    )

    df_test = pd.concat(
        [df for trial, df in dfs.items() if trial in test_trial],
        ignore_index=True
    )


    X_train, y_train = df_train.loc[:, features].values, df_train.loc[:, target].values
    X_test, y_test = df_test.loc[:, features].values, df_test.loc[:, target].values
    
    clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
    with parallel_backend('threading', n_jobs=-1):
        models, predictions = clf.fit(X_train, X_test, y_train, y_test)

    return models, predictions

## All features -> rule1

In [43]:
models, _ = experiment()
print(models)

100%|██████████| 29/29 [1:42:05<00:00, 211.23s/it]   

                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
SVC                                0.83               0.84     0.84      0.84   
SGDClassifier                      0.86               0.83     0.83      0.86   
LogisticRegression                 0.84               0.82     0.82      0.84   
CalibratedClassifierCV             0.84               0.82     0.82      0.85   
LGBMClassifier                     0.81               0.82     0.82      0.81   
LinearSVC                          0.85               0.82     0.82      0.85   
RandomForestClassifier             0.81               0.81     0.81      0.81   
LinearDiscriminantAnalysis         0.85               0.81     0.81      0.85   
RidgeClassifier                    0.85               0.81     0.81      0.85   
RidgeClassifierCV                  0.85               0.81     0.81      0.85   
XGBClassifier               




## All features -> rule2

In [44]:
models, _ = experiment(target = "rule2")
print(models)

100%|██████████| 29/29 [1:39:53<00:00, 206.67s/it]   

                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
SVC                                0.80               0.79     0.79      0.81   
XGBClassifier                      0.77               0.76     0.76      0.78   
SGDClassifier                      0.82               0.76     0.76      0.82   
LGBMClassifier                     0.77               0.76     0.76      0.78   
RandomForestClassifier             0.77               0.75     0.75      0.78   
LogisticRegression                 0.81               0.74     0.74      0.81   
CalibratedClassifierCV             0.81               0.74     0.74      0.81   
LinearSVC                          0.81               0.74     0.74      0.81   
LinearDiscriminantAnalysis         0.80               0.74     0.74      0.80   
QuadraticDiscriminantAnalysis      0.73               0.74     0.74      0.74   
ExtraTreesClassifier        




## Accelerometer only -> rule1

In [45]:
models, _ = experiment(
    features = ["pelvis_ax", "pelvis_ay", "pelvis_az"]
)
print(models)

100%|██████████| 29/29 [2:54:39<00:00, 361.36s/it]   

                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
QuadraticDiscriminantAnalysis      0.68               0.67     0.67      0.69   
GaussianNB                         0.68               0.66     0.66      0.69   
XGBClassifier                      0.74               0.66     0.66      0.73   
LGBMClassifier                     0.74               0.65     0.65      0.72   
RandomForestClassifier             0.73               0.65     0.65      0.72   
ExtraTreesClassifier               0.73               0.65     0.65      0.72   
KNeighborsClassifier               0.72               0.64     0.64      0.71   
AdaBoostClassifier                 0.73               0.64     0.64      0.71   
NuSVC                              0.70               0.63     0.63      0.70   
BaggingClassifier                  0.72               0.63     0.63      0.70   
SVC                         




## Accelerometer only -> rule2

In [46]:
models, _ = experiment(
    target = "rule2",
    features = ["pelvis_ax", "pelvis_ay", "pelvis_az"]
)
print(models)

100%|██████████| 29/29 [2:37:23<00:00, 325.62s/it]   

                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
QuadraticDiscriminantAnalysis      0.68               0.63     0.63      0.70   
GaussianNB                         0.68               0.63     0.63      0.70   
KNeighborsClassifier               0.73               0.59     0.59      0.71   
LGBMClassifier                     0.75               0.59     0.59      0.72   
XGBClassifier                      0.75               0.59     0.59      0.72   
RandomForestClassifier             0.74               0.59     0.59      0.71   
ExtraTreesClassifier               0.74               0.58     0.58      0.71   
DecisionTreeClassifier             0.69               0.58     0.58      0.68   
ExtraTreeClassifier                0.69               0.58     0.58      0.68   
BaggingClassifier                  0.73               0.57     0.57      0.70   
AdaBoostClassifier          


