In [1]:
from datetime import datetime
from pathlib import Path
import sklearn
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
import numpy as np
import pandas as pd

print(sklearn.__version__)

1.5.2


In [2]:
ACTIVITY_TAG_MAP = {
    1: "fall_forward_hands",
    2: "fall_forward_knees",
    3: "fall_backwards",
    4: "fall_sideward",
    5: "fall_sitting_chair",
    6: "walking",
    7: "standing",
    8: "sitting",
    9: "picking_object",
    10: "jumping",
    11: "laying",
}

In [3]:
def get_metrics(expected_y, predicted_y):
    return {
        "recall": metrics.recall_score(expected_y, predicted_y),
        "f1": metrics.f1_score(expected_y, predicted_y),
        "precision": metrics.precision_score(expected_y, predicted_y),
        "accuracy": metrics.accuracy_score(expected_y, predicted_y),
    }

In [4]:
models = {
    "KNN": {
        "estimator": KNN(metric="euclidean"),
        "params": {
            "n_neighbors": [3, 5, 7, 9, 11],
            "weights": ["uniform", "distance"],
        },
    },
    "RF": {
        "estimator": RandomForestClassifier(
            criterion="gini", oob_score=metrics.recall_score
        ),
        "params": {
            "n_estimators": [50, 100, 200],
            "max_features": ["sqrt", "log2"],
        },
    },
    # https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html
    "MLP": {
        "estimator": MLPClassifier(solver="adam", max_iter=1000),
        "params": {
            # "max_iter": [100, 500, 1000],  # number of epochs
            "hidden_layer_sizes": [
                (8,),
                (8, 8),
                (8, 16),
                (8, 32),
                (8, 64),
                (8, 128),
                (16,),
                (16, 8),
                (16, 16),
                (16, 32),
                (16, 64),
                (16, 128),
                (32,),
                (32, 8),
                (32, 16),
                (32, 32),
                (32, 64),
                (32, 128),
                (64,),
                (64, 8),
                (64, 16),
                (64, 32),
                (64, 64),
                (64, 128),
                (128,),
                (128, 8),
                (128, 16),
                (128, 32),
                (128, 64),
                (128, 128),
            ],
        },
    },
    "SVM": {
        "estimator": SVC(),
        "params": {"class_weight": [{0: 1, 1: 10}, {0: 1, 1: 20}, {0: 1, 1: 1}]},
    },
}

algorithm_options = ["KNN", "RF", "MLP", "SVM"]
# algorithm = "KNN"

In [5]:
datasets_path = Path.cwd() / "datasets"
columns_not_used_for_training = ["Timestamp", "Subject", "Trial", "Activity", "Tag"]

skf = StratifiedKFold(n_splits=5, shuffle=True)

In [None]:
for algorithm in algorithm_options:
    model = models[algorithm]

    gs_cv_model = GridSearchCV(
        model["estimator"],
        model["params"],
        scoring="recall",
        n_jobs=-1,
        cv=2,
    )
    output_dir_name = (
        "results/" + datetime.today().strftime("%Y-%m-%d_%H-%M_") + algorithm
    )

    output_dir = Path.cwd() / output_dir_name

    output_dir.mkdir(exist_ok=True, parents=True)


    training_info_output_file = output_dir / "_training_info.txt"


    training_info_output_file.write_text(f"{algorithm} - {model['params']}")


    output_file_path = output_dir / f"{algorithm}_training_results.csv"

    output_file = open(output_file_path, mode="w", encoding="utf-8")
    output_file.write(
        "sensor_position,features_domain,n_fold,recall,f1,precision,accuracy,best_params\n"
    )

    mistakes_file_path = output_dir / f"{algorithm}_mistakes.csv"
    mistakes_file = open(mistakes_file_path, mode="w", encoding="utf-8")
    mistakes_file.write(
        "sensor_position,features_domain,n_fold,"
        + ",".join(ACTIVITY_TAG_MAP.values())
        + "\n"
    )

    for dataset in datasets_path.iterdir():

        data = pd.read_csv(dataset, header=0).dropna()
        data = data[data["Tag"] != 20]

        X = data.drop(columns=columns_not_used_for_training)
        # X = data

        print(X.shape)


        y = pd.DataFrame(
            {"is_fall": [0 if row > 5 else 1 for row in data["Tag"]]}
        ).values.reshape(
            -1,
        )


        print(y.shape)

        # print(data.iloc[0])
        # print(data.at[data.index[0], "Tag"])

        # continue


        n_fold = 0

        sensor_position, features_domain = dataset.name.rstrip("dataset.csv").split(
            "_", 1
        )


        features_domain = "both" if not features_domain else features_domain.rstrip("_")


        for train, test in skf.split(X, y):

            X_train, X_test = X.iloc[train], X.iloc[test]


            y_train, y_test = y[train], y[test]


            classifier = gs_cv_model.fit(X_train, y_train)


            prediction = classifier.predict(X_test)

            # print(prediction)


            fold_metrics = get_metrics(y_test, prediction)

            output_file.write(
                f"{sensor_position},{features_domain},{n_fold},{fold_metrics['recall']},"
                f"{fold_metrics['f1']},{fold_metrics['precision']},{fold_metrics['accuracy']},\"{classifier.best_params_}\"\n"
            )

            print(
                f"{sensor_position},{features_domain},{n_fold},{fold_metrics['recall']},"
                f"{fold_metrics['f1']},{fold_metrics['precision']},{fold_metrics['accuracy']},\"{classifier.best_params_}\"\n"
            )

            fold_mistakes = {i: 0 for i in range(1, 12)}

            for i in range(len(prediction)):
                if prediction[i] != y_test[i]:
                    fold_mistakes[data.at[data.index[test[i]], "Tag"]] += 1

            mistakes_file.write(
                f"{sensor_position},{features_domain},{n_fold},"
                f"{','.join(str(value) for value in fold_mistakes.values())}\n"
            )

            print(
                f"{sensor_position},{features_domain},{n_fold},"
                f"{','.join(str(value) for value in fold_mistakes.values())}\n"
            )


            print(metrics.confusion_matrix(y_test, prediction))

            n_fold += 1


    output_file.close()
    mistakes_file.close()


# print(metrics.classification_report(expected_y, predicted_y))

(32113, 108)
(32113,)
Ankle,both,0,0.04712041884816754,0.0743801652892562,0.17647058823529413,0.9651253308422856,"{'n_neighbors': 3, 'weights': 'distance'}"

Ankle,both,0,36,34,39,24,49,28,0,2,2,4,6

[[6190   42]
 [ 182    9]]
Ankle,both,1,0.06282722513089005,0.09302325581395349,0.1791044776119403,0.9635684259691732,"{'n_neighbors': 3, 'weights': 'distance'}"

Ankle,both,1,22,29,45,39,44,25,4,2,4,13,7

[[6177   55]
 [ 179   12]]
Ankle,both,2,0.07329842931937172,0.11067193675889328,0.22580645161290322,0.9649696403549743,"{'n_neighbors': 3, 'weights': 'distance'}"

Ankle,both,2,36,34,38,21,48,22,1,1,2,15,7

[[6184   48]
 [ 177   14]]
Ankle,both,3,0.07329842931937172,0.11155378486055777,0.23333333333333334,0.9652756150731859,"{'n_neighbors': 3, 'weights': 'distance'}"

Ankle,both,3,40,33,37,26,41,26,1,3,0,11,5

[[6185   46]
 [ 177   14]]
Ankle,both,4,0.07853403141361257,0.12096774193548387,0.2631578947368421,0.9660541887262535,"{'n_neighbors': 3, 'weights': 'distance'}"

Ankle,both,4,27,3

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Ankle,both,4,0.0,0.0,0.0,0.9702584864528184,"{'hidden_layer_sizes': (32, 128)}"

Ankle,both,4,38,28,37,35,53,0,0,0,0,0,0

[[6231    0]
 [ 191    0]]
(32113, 36)
(32113,)
Ankle,frequency,0,0.5602094240837696,0.34076433121019106,0.2448512585812357,0.9355441382531527,"{'hidden_layer_sizes': (128,)}"

Ankle,frequency,0,13,10,13,21,27,145,1,1,0,53,130

[[5902  330]
 [  84  107]]
Ankle,frequency,1,0.03664921465968586,0.0625,0.21212121212121213,0.9673049976646427,"{'hidden_layer_sizes': (16, 16)}"

Ankle,frequency,1,29,40,34,36,45,4,0,2,0,1,19

[[6206   26]
 [ 184    7]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Ankle,frequency,2,0.0,0.0,0.0,0.9702631169235559,"{'hidden_layer_sizes': (8, 8)}"

Ankle,frequency,2,28,31,44,35,53,0,0,0,0,0,0

[[6232    0]
 [ 191    0]]
Ankle,frequency,3,0.12041884816753927,0.1464968152866242,0.18699186991869918,0.9582684521955777,"{'hidden_layer_sizes': (128, 64)}"

Ankle,frequency,3,38,33,36,28,33,69,0,0,0,0,31

[[6131  100]
 [ 168   23]]
Ankle,frequency,4,0.015706806282722512,0.02564102564102564,0.06976744186046512,0.9644970414201184,"{'hidden_layer_sizes': (32, 128)}"

Ankle,frequency,4,35,27,44,33,49,1,0,1,0,2,36

[[6191   40]
 [ 188    3]]
(32117, 72)
(32117,)
Ankle,time,0,0.41361256544502617,0.4514285714285714,0.4968553459119497,0.9701120797011208,"{'hidden_layer_sizes': (128,)}"

Ankle,time,0,23,16,28,21,24,38,8,9,4,4,17

[[6153   80]
 [ 112   79]]
Ankle,time,1,0.005235602094240838,0.010416666666666666,1.0,0.9704234122042341,"{'hidden_layer_sizes': (128, 16)}"

Ankle,time,1,37,25,41,32,55,0,0,0,0,0,0

[[6233    0]
 [ 190    1]]
Ankle,time,2,0.58115183246073

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Belt,time,4,0.0,0.0,0.0,0.9702631169235559,"{'hidden_layer_sizes': (32, 16)}"

Belt,time,4,34,34,47,35,41,0,0,0,0,0,0

[[6232    0]
 [ 191    0]]
(28324, 540)
(28324,)
full,both,0,0.4588235294117647,0.5909090909090909,0.8297872340425532,0.9809355692850839,"{'hidden_layer_sizes': (16,)}"

full,both,0,24,13,12,16,27,1,0,1,4,2,8

[[5479   16]
 [  92   78]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


full,both,1,0.0,0.0,0.0,0.969991173874669,"{'hidden_layer_sizes': (64, 16)}"

full,both,1,28,28,38,36,40,0,0,0,0,0,0

[[5495    0]
 [ 170    0]]
full,both,2,0.611764705882353,0.6624203821656051,0.7222222222222222,0.981288614298323,"{'hidden_layer_sizes': (32,)}"

full,both,2,7,9,17,12,21,11,7,3,4,2,13

[[5455   40]
 [  66  104]]
full,both,3,0.6374269005847953,0.6855345911949685,0.7414965986394558,0.9823477493380406,"{'hidden_layer_sizes': (32,)}"

full,both,3,7,16,11,11,17,18,4,2,1,0,13

[[5456   38]
 [  62  109]]
full,both,4,0.0,0.0,0.0,0.9698093220338984,"{'hidden_layer_sizes': (128, 32)}"

full,both,4,39,26,37,25,43,0,0,0,1,0,0

[[5493    1]
 [ 170    0]]
(32113, 36)
(32113,)
full,frequency,0,0.0,0.0,0.0,0.9690175930250662,"{'hidden_layer_sizes': (16, 8)}"

full,frequency,0,33,35,45,31,47,0,1,1,0,0,6

[[6224    8]
 [ 191    0]]
full,frequency,1,0.6178010471204188,0.14106395696353854,0.0796221322537112,0.7762727697337692,"{'hidden_layer_sizes': (64, 32)}"

full,frequency,1,21,18,13,1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


full,frequency,3,0.0,0.0,0.0,0.9702584864528184,"{'hidden_layer_sizes': (8, 32)}"

full,frequency,3,35,21,45,34,56,0,0,0,0,0,0

[[6231    0]
 [ 191    0]]
full,frequency,4,0.05235602094240838,0.08333333333333333,0.20408163265306123,0.9657427592650265,"{'hidden_layer_sizes': (128,)}"

full,frequency,4,33,38,36,34,40,3,7,6,0,1,22

[[6192   39]
 [ 181   10]]
(32117, 72)
(32117,)
full,time,0,0.05759162303664921,0.10091743119266056,0.4074074074074074,0.9694894146948941,"{'hidden_layer_sizes': (128, 128)}"

full,time,0,26,30,38,46,40,2,2,3,0,4,5

[[6217   16]
 [ 180   11]]
full,time,1,0.193717277486911,0.26714801444043323,0.43023255813953487,0.9683997509339975,"{'hidden_layer_sizes': (16, 16)}"

full,time,1,31,21,33,20,49,5,9,4,1,3,27

[[6184   49]
 [ 154   37]]
full,time,2,0.1256544502617801,0.19591836734693877,0.4444444444444444,0.9693289739996886,"{'hidden_layer_sizes': (128,)}"

full,time,2,32,35,40,30,30,3,4,7,1,1,14

[[6202   30]
 [ 167   24]]
full,time,3,0.06282722513089005,0.10859728

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Neck,frequency,4,0.0,0.0,0.0,0.9702584864528184,"{'hidden_layer_sizes': (16, 64)}"

Neck,frequency,4,47,32,37,29,46,0,0,0,0,0,0

[[6231    0]
 [ 191    0]]
(32117, 72)
(32117,)
Neck,time,0,0.2617801047120419,0.38022813688212925,0.6944444444444444,0.974626400996264,"{'hidden_layer_sizes': (128, 64)}"

Neck,time,0,26,33,28,20,34,2,2,0,5,10,3

[[6211   22]
 [ 141   50]]
Neck,time,1,0.34554973821989526,0.3793103448275862,0.42038216560509556,0.9663760896637609,"{'hidden_layer_sizes': (64,)}"

Neck,time,1,26,25,26,17,31,16,12,3,3,23,34

[[6142   91]
 [ 125   66]]
Neck,time,2,0.041884816753926704,0.07766990291262135,0.5333333333333333,0.9704188074108672,"{'hidden_layer_sizes': (64, 16)}"

Neck,time,2,37,30,35,29,52,0,1,0,0,1,5

[[6225    7]
 [ 183    8]]
Neck,time,3,0.3403141361256545,0.38235294117647056,0.436241610738255,0.9673049976646427,"{'hidden_layer_sizes': (128,)}"

Neck,time,3,21,22,31,26,26,2,17,1,6,21,37

[[6148   84]
 [ 126   65]]
Neck,time,4,0.675392670157068,0.34172185430463575,

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


RightPocket,both,1,0.0,0.0,0.0,0.969991173874669,"{'hidden_layer_sizes': (16, 128)}"

RightPocket,both,1,30,37,32,29,42,0,0,0,0,0,0

[[5495    0]
 [ 170    0]]
RightPocket,both,2,0.15294117647058825,0.2047244094488189,0.30952380952380953,0.964342453662842,"{'hidden_layer_sizes': (32, 32)}"

RightPocket,both,2,30,26,26,27,35,21,11,6,9,2,9

[[5437   58]
 [ 144   26]]
RightPocket,both,3,0.0,0.0,0.0,0.9696381288614299,"{'hidden_layer_sizes': (8,)}"

RightPocket,both,3,29,30,38,36,38,0,0,1,0,0,0

[[5493    1]
 [ 171    0]]
RightPocket,both,4,0.0058823529411764705,0.011627906976744186,0.5,0.9699858757062146,"{'hidden_layer_sizes': (64, 128)}"

RightPocket,both,4,31,26,44,27,41,0,0,1,0,0,0

[[5493    1]
 [ 169    1]]
(28324, 36)
(28324,)
RightPocket,frequency,0,0.0,0.0,0.0,0.9696381288614299,"{'hidden_layer_sizes': (8, 32)}"

RightPocket,frequency,0,28,29,40,28,45,0,0,0,0,0,2

[[5493    2]
 [ 170    0]]
RightPocket,frequency,1,0.7705882352941177,0.1935007385524372,0.11064189189189189,0.807237

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


RightPocket,frequency,4,0.0,0.0,0.0,0.9699858757062146,"{'hidden_layer_sizes': (128, 128)}"

RightPocket,frequency,4,31,35,39,32,33,0,0,0,0,0,0

[[5494    0]
 [ 170    0]]
(28326, 72)
(28326,)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


RightPocket,time,0,0.0,0.0,0.0,0.9698199788210378,"{'hidden_layer_sizes': (64, 16)}"

RightPocket,time,0,24,34,41,30,42,0,0,0,0,0,0

[[5495    0]
 [ 171    0]]
RightPocket,time,1,0.07058823529411765,0.12834224598930483,0.7058823529411765,0.9712268314210062,"{'hidden_layer_sizes': (16, 128)}"

RightPocket,time,1,28,28,28,30,44,0,0,1,3,0,1

[[5490    5]
 [ 158   12]]
RightPocket,time,2,0.27647058823529413,0.4,0.7230769230769231,0.9751103265666372,"{'hidden_layer_sizes': (128, 64)}"

RightPocket,time,2,24,25,26,17,31,7,0,0,1,8,2

[[5477   18]
 [ 123   47]]
RightPocket,time,3,0.29411764705882354,0.3787878787878788,0.5319148936170213,0.9710503089143866,"{'hidden_layer_sizes': (128, 32)}"

RightPocket,time,3,28,25,19,23,25,17,1,1,10,9,6

[[5451   44]
 [ 120   50]]
RightPocket,time,4,0.10588235294117647,0.18848167539267016,0.8571428571428571,0.9726390114739629,"{'hidden_layer_sizes': (32, 128)}"

RightPocket,time,4,29,28,40,26,29,0,0,0,0,2,1

[[5492    3]
 [ 152   18]]
(32113, 108)
(32113,)
W

In [7]:
len(classifier.best_estimator_.feature_importances_)

AttributeError: 'SVC' object has no attribute 'feature_importances_'