## PIR89 ['30 min', '1 h', '3 h']

agg = {
        'acc_x': [np.std, skew],
        'acc_y': [np.std, skew],
        'acc_z': [np.std, skew],
        'heartrate': [np.mean, np.std, skew],
        'conductance': [np.mean, np.std, skew],
        'temperature': [np.mean, np.std, skew],
        'bvp': [np.mean, np.std, skew],
    }

In [38]:
import datetime
import logging
import numpy as np
import pandas as pd
import sklearn.metrics
import xgboost

from ds_utils import aggregators
from ds_utils import e4

def test_xgb(df: pd.DataFrame, model: xgboost.XGBClassifier) -> np.ndarray:
    """Make predictions from a trained xgboost model on aggregated data
    Args:
        df (pd.DataFrame): test dataframe
        model (xgboost.XGBClassifier): trained model
    Returns:
        np.ndarray: the predicted values
    
    """
    X = df.drop(columns=['craving_onset', '_window_end']).values
    Y = df['craving_onset'].map({True: 1, False: 0}).values

    Y_pred = model.predict(X)
    predictions = [round(value) for value in Y_pred]   
     
    accuracy = sklearn.metrics.accuracy_score(Y, Y_pred)
    print("Accuracy: %.2f%%" % (accuracy * 100.0))
    return Y_pred
    

In [39]:
df1 = pd.read_csv("./data/agg_data89p1.csv")
df2 = pd.read_csv("./data/agg_data89p2.csv")
df3 = pd.read_csv("./data/agg_data89p3.csv")
test = pd.read_csv("./data/agg_data89p4.csv")

In [40]:
df_list = [df1, df2, df3]
data = pd.concat(df_list, axis = 0)

In [41]:
data.columns
data = data.drop(columns = ['_window_end'])

In [42]:
X = data.drop(columns=['craving_onset']).values
Y = data['craving_onset'].map({True: 1, z: 0}).values

In [49]:
model = xgboost.XGBClassifier()
model.fit(X, Y)

Parameters: { earlyStoppingRounds, learningRate, maxDepth, numIters, testRatio } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.3, earlyStoppingRounds=8,
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learningRate=0.15,
              learning_rate=0.300000012, maxDepth=9, max_delta_step=0,
              max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              numIters=100, num_parallel_tree=1, random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=0.9, testRatio=0.2,
              tree_method='exact', validate_parameters=1, ...)

In [51]:
pred = test_xgb(test, model)

Accuracy: 49.93%


In [48]:
model.feature_importances_

array([0.00068919, 0.00125908, 0.00096237, 0.00036883, 0.00117588,
       0.0005808 , 0.00108941, 0.01181771, 0.0021724 , 0.00429166,
       0.00944238, 0.00817777, 0.00301394, 0.0029753 , 0.00435416,
       0.00180719, 0.01184864, 0.01160891, 0.00366415, 0.01129359,
       0.02594897, 0.04727761, 0.00656796, 0.01311824, 0.0039641 ,
       0.06901521, 0.00726602, 0.01963148, 0.00599036, 0.0031177 ,
       0.05365263, 0.00436555, 0.00637273, 0.02147398, 0.01146913,
       0.00897142, 0.02706167, 0.01074139, 0.0101092 , 0.02977097,
       0.02311781, 0.00458653, 0.03369442, 0.01633362, 0.03912577,
       0.00510528, 0.06699607, 0.01143702, 0.00744526, 0.01824276,
       0.03359954, 0.01657909, 0.01541454, 0.00298442, 0.09677815,
       0.01118011, 0.00602092, 0.00977504, 0.059619  , 0.03755657,
       0.00592839], dtype=float32)

In [47]:
data.columns

Index(['acc_x', 'acc_y', 'acc_z', 'bvp', 'conductance', 'heartrate',
       'temperature', 'craving_onset', 'acc_x_30min__std', 'acc_x_30min__skew',
       'acc_y_30min__std', 'acc_y_30min__skew', 'acc_z_30min__std',
       'acc_z_30min__skew', 'heartrate_30min__mean', 'heartrate_30min__std',
       'heartrate_30min__skew', 'conductance_30min__mean',
       'conductance_30min__std', 'conductance_30min__skew',
       'temperature_30min__mean', 'temperature_30min__std',
       'temperature_30min__skew', 'bvp_30min__mean', 'bvp_30min__std',
       'bvp_30min__skew', 'acc_x_1h__std', 'acc_x_1h__skew', 'acc_y_1h__std',
       'acc_y_1h__skew', 'acc_z_1h__std', 'acc_z_1h__skew',
       'heartrate_1h__mean', 'heartrate_1h__std', 'heartrate_1h__skew',
       'conductance_1h__mean', 'conductance_1h__std', 'conductance_1h__skew',
       'temperature_1h__mean', 'temperature_1h__std', 'temperature_1h__skew',
       'bvp_1h__mean', 'bvp_1h__std', 'bvp_1h__skew', 'acc_x_3h__std',
       'acc_x_3h__s