In [41]:
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

In [58]:
all_activity_data = ['data/patient_1/patient_1_activity.csv', 'data/patient_2/patient_2_activity.csv',
                    'data/patient_3/patient_3_activity.csv']
test_data = 'data/test/testdata/test_data.csv'

In [51]:
li = []
for filename in all_activity_data:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

data = pd.concat(li, axis=0, ignore_index=True)
data = data\
[['summary_date',
  'score_stay_active',
  'score_move_every_hour',
  'score_meet_daily_targets',
  'score_training_frequency',
  'score_training_volume',
  'score_recovery_time']]

In [52]:
def get_weekday_end(weekday):
    if weekday in [5,6,7]:
        return 'weekend'
    else:
        return 'weekday'
    
data['weekday'] = pd.to_datetime(data['summary_date']).apply(lambda x: get_weekday_end(x.weekday()))



In [53]:
data

Unnamed: 0,summary_date,score_stay_active,score_move_every_hour,score_meet_daily_targets,score_training_frequency,score_training_volume,score_recovery_time,weekday
0,2019-02-06,100.0,100.0,95.0,100.0,100.0,98.0,weekday
1,2019-02-07,70.0,100.0,100.0,100.0,100.0,89.0,weekday
2,2019-02-08,88.0,100.0,100.0,100.0,99.0,100.0,weekday
3,2019-02-09,84.0,100.0,100.0,100.0,99.0,100.0,weekend
4,2019-02-10,87.0,100.0,100.0,100.0,98.0,98.0,weekend
...,...,...,...,...,...,...,...,...
2789,2022-10-31,48.0,78.0,60.0,100.0,97.0,98.0,weekday
2790,2022-11-01,72.0,100.0,43.0,100.0,96.0,100.0,weekday
2791,2022-11-02,78.0,95.0,43.0,100.0,96.0,100.0,weekday
2792,2022-11-03,73.0,100.0,25.0,100.0,96.0,100.0,weekday


In [54]:
data['weekday'] = LabelEncoder().fit_transform(data['weekday'])

pipeline = Pipeline([
    ('normalizer', StandardScaler()), #Step1 - normalize data
    ('clf', LogisticRegression()) #step2 - classifier
])
pipeline.steps

[('normalizer', StandardScaler()), ('clf', LogisticRegression())]

In [55]:
X_train, y_train = data.iloc[:,1:-1].values, data['weekday']

In [56]:
X_train

array([[100., 100.,  95., 100., 100.,  98.],
       [ 70., 100., 100., 100., 100.,  89.],
       [ 88., 100., 100., 100.,  99., 100.],
       ...,
       [ 78.,  95.,  43., 100.,  96., 100.],
       [ 73., 100.,  25., 100.,  96., 100.],
       [ 78., 100.,  25.,  96.,  79., 100.]])

In [None]:
pipeline.fit(X_train, y_train)