In [1]:
from lib.config import Config_f
from lib.data_set import Features
from lib.model import SimpleModel
from lib import utils

In [2]:
import ctypes
import pandas as pd
import numpy as np
import pywt
import matplotlib.pyplot as plt

from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest,f_classif,chi2,mutual_info_classif,VarianceThreshold,RFE,SelectFromModel
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn import metrics

import xgboost as xgb
from xgboost import plot_importance
from imblearn.over_sampling import SMOTE,BorderlineSMOTE

## File name read

In [3]:
# read file name of data with various Labels
df = pd.read_csv('./useful_data_label.csv',index_col=0) 
# read file name of data with only label 0
df2 = pd.read_csv('./unuseful_data_label.csv',index_col=0)
# read some of the data with only label 0
df3 = pd.read_csv('./data/file_name.txt',header=None)
player = ctypes.windll.kernel32

ind = df2.iloc[1].isna()
files = np.concatenate([np.array(df.columns),np.array('normal/'+df2.columns[ind])])

## Configuration

In [20]:
# Override the base class of Config and Features for XGBoost Model
class XGB_Config(Config_f):
    NAME = 'XGBoost'
    NUM_CLASSES = 2

    CLASS_WEIGHTS = None
    FN_LP = 300
    FN_HP = None
    FN_IR = False
    DETREND_LAMBDA = 50
    DROP_WITH_ZSCORE = None
    REMOVE_FREQS = True
    TEST_FILES = files[[6,30,31,32,33,34,35]]
    SHUFFLE=True
    
    # XGBoost parameters
    N_EST = 500
    MAX_DEPTH = 3
    LR = 0.1
    SUBSAMPLE = 0.9
    REG_LAMBDA = 25
    
class XGB_dataset(Features):
    
    def __init__(self,config):
        super(XGB_dataset,self).__init__(config)
        self.config = config
        

In [21]:
# Generate XGBoost configuration
config = XGB_Config()
config.display()


Configurations:
BINS                           3
CHANNELS                       ['LEFT_TA', 'LEFT_TS', 'LEFT_BF', 'LEFT_RF', 'RIGHT_TA', 'RIGHT_TS', 'RIGHT_BF', 'RIGHT_RF']
CLASS_WEIGHTS                  None
DETREND_LAMBDA                 50
DROP_WITH_ZSCORE               None
FEATURES_LIST                  ['IEMG', 'SSI', 'WL', 'ZC', 'ku', 'SSC', 'skew', 'Acti', 'AR', 'HIST', 'MDF', 'MNF', 'mDWT']
FN_HP                          None
FN_IR                          False
FN_LP                          300
LEVEL_DWT                      3
LR                             0.1
MAX_DEPTH                      3
NAME                           XGBoost
NUM_CLASSES                    2
NUM_MF                         3
N_ENV                          20
N_EST                          500
RANGES                         (-3, 3)
RECT                           False
REG_LAMBDA                     25
REMOVE_FREQS                   True
SAME_LABEL                     True
SAVE                           

## Data generate

In [22]:
data = XGB_dataset(config)

In [23]:
# Choose features to use
data.feature_list = ['IEMG', 'SSI', 'WL', 'ZC', 'ku', 'SSC', 'skew', 'Acti', 'AR', 'HIST', 'MDF', 'MNF', 'mDWT']

# Load data from files
data.load_data(files)

# Extract features from data
data.extract_features()

X_train,Y_train,F1 = data.train_set
X_valid,Y_valid,F2 = data.valid_set
X_test, Y_test, F3 = data.test_set

skip
skip
3/174: G06_FoG_trial_1_emg.csv
4/174: G06_FoG_trial_2_emg.csv
5/174: G06_FoG_trial_3_emg.csv
6/174: G07_Freezing_Trial1_trial_1_emg.csv
7/174: G08_FoG_1_trial_1_emg.csv
8/174: G08_FoG_2_trial_1_emg.csv
9/174: G11_FoG_trial_1_emg.csv
10/174: G11_FoG_trial_2_emg.csv
11/174: P379_M050_2_OFF_A_FoG_trial_1_emg.csv
12/174: P379_M050_2_OFF_A_FoG_trial_2_emg.csv
13/174: P379_M050_2_OFF_A_FoG_trial_3_emg.csv
14/174: P379_M050_2_OFF_B_FoG_trial_1_emg.csv
15/174: P379_M050_2_OFF_B_FoG_trial_2_emg.csv
16/174: P379_M050_2_OFF_B_FoG_trial_3_emg.csv
17/174: P551_M050_2_A_FoG_trial_1_emg.csv
18/174: P551_M050_2_B_FoG_trial_1_emg.csv
19/174: P551_M050_2_B_FoG_trial_2_emg.csv
20/174: P812_M050_2_B_FoG_trial_1_emg.csv
21/174: P812_M050_2_B_FoG_trial_2_emg.csv
22/174: normal/G02_Walking_trial_1_emg.csv
23/174: normal/G03_Walking_trial_1_emg.csv
24/174: normal/G03_Walking_trial_2_emg.csv
25/174: normal/G05_Walking_struct_fixed_trial_1_emg.csv
26/174: normal/G05_Walking_struct_fixed_trial_2_emg.cs

171/174: normal/P940_MSham_A_Walking_trial_6_emg.csv
172/174: normal/P940_MSham_B_Walking_trial_2_emg.csv
173/174: normal/P940_MSham_B_Walking_trial_4_emg.csv
174/174: normal/P940_MSham_B_Walking_trial_6_emg.csv
threshold_WAMP:1.0, threshold_ZC:0.0, threshold_SSC:0.0, bins:3, ranges:(-3,3), num_mf:3, wavelet: db7, level: 3
['IEMG', 'SSI', 'WL', 'ZC', 'ku', 'SSC', 'skew', 'Acti', 'AR', 'HIST', 'MDF', 'MNF', 'mDWT']
threshold_WAMP:1.0, threshold_ZC:0.0, threshold_SSC:0.0, bins:3, ranges:(-3,3), num_mf:3, wavelet: db7, level: 3
['IEMG', 'SSI', 'WL', 'ZC', 'ku', 'SSC', 'skew', 'Acti', 'AR', 'HIST', 'MDF', 'MNF', 'mDWT']
threshold_WAMP:1.0, threshold_ZC:0.0, threshold_SSC:0.0, bins:3, ranges:(-3,3), num_mf:3, wavelet: db7, level: 3
['IEMG', 'SSI', 'WL', 'ZC', 'ku', 'SSC', 'skew', 'Acti', 'AR', 'HIST', 'MDF', 'MNF', 'mDWT']


In [24]:
X=np.concatenate([X_train,X_valid,X_test])
Y=np.concatenate([Y_train,Y_valid,Y_test])
F=np.concatenate([F1,F2,F3])

## Model

In [25]:
# Override base class of SimpleMode for XGBoost
class XGB_Model(SimpleModel):
    
    def build(self,config):
        
        if config.NUM_CLASSES > 2:
            model = xgb.XGBClassifier(max_depth=config.MAX_DEPTH, 
                              learning_rate=config.LR, 
                              n_estimators=config.N_EST, 
                              objective='multi:softmax',
                              num_calss=config.NUM_CLASSES,
                              subsample=config.SUBSAMPLE,
                              reg_lambda = config.REG_LAMBDA,
                              #reg_alpha = 3,
                              **config.PARA
                             )
        else:
            model = xgb.XGBClassifier(max_depth=config.MAX_DEPTH, 
                              learning_rate=config.LR, 
                              n_estimators=config.N_EST, 
                              objective='binary:logistic',
                              scale_pos_weight = config.CLASS_WEIGHTS,
                              subsample=config.SUBSAMPLE,
                              reg_lambda = config.REG_LAMBDA,
                              #reg_alpha = 3,
                              **config.PARA
                             )
        
        if config.CLASS_WEIGHTS != None:
            print('Using class weights: 1:',config.CLASS_WEIGHTS)
        
        return model
    
    def train(self, train_dataset, val_dataset, transformer=None, **kwargs):
        
        self.X_train = train_dataset[0]
        self.scaler = MinMaxScaler((0,1))
        X_train = self.scaler.fit_transform(train_dataset[0])
        
        # scaler.fit(np.concatenate([train_dataset[0],val_dataset[0]]))
        X_val = self.scaler.transform(val_dataset[0])
        
        if transformer != None:
            self.transformer = transformer
            self.transformer.fit(X_train,train_dataset[1])
            X_train = self.transformer.transform(X_train)
            X_val = self.transformer.transform(X_val)
        else:
            self.transformer = None
        metric = 'merror' if self.config.NUM_CLASSES > 2 else 'error'
            
        eval_set=[(X_train,train_dataset[1]),(X_val,val_dataset[1])]
        self.simple_model.fit(X_train, train_dataset[1],eval_metric=[metric],
                              eval_set=eval_set,
                              early_stopping_rounds=30,
                              **kwargs)
        
    def predict(self, data):
        
        data = np.array(data)
        # scaler = MinMaxScaler()
        # scaler.fit(np.concatenate([self.X_train,data]))
        X = self.scaler.transform(data)
        if self.transformer != None:
            X = self.transformer.transform(X)
        results = self.simple_model.predict(X)

        return results

    def model_metrics(self,data,label):
        pred = self.predict(data)
        acc = metrics.accuracy_score(label,pred)
        cm = metrics.confusion_matrix(label,pred)
        f1 = metrics.f1_score(label,pred,average='macro')
        return acc,cm,f1

## Data split

In [26]:
# data split and processing for model
class_id = [1,2,6]
binary = True
x_train,y_train,x_valid,y_valid,x_test,y_test = utils.data_split((X_train,X_valid,X_test),
                                                                 (Y_train,Y_valid,Y_test),
                                                                 class_id,
                                                                 binary,
                                                                 random_state = 555)

## Model training

In [27]:
if binary:
    config.CLASS_WEIGHTS = 9
    config.NUM_CLASSES = 2
    config.LR=0.1
    config.MAX_DEPTH=3
    config.SUBSAMPLE = 0.5
    config.REG_LAMBDA = 27
    config.PARA={'colsample_bytree':0.5}

    # pca 140
else:
    config.CLASS_WEIGHTS = 1
    config.NUM_CLASSES = len(class_id)
    config.LR=0.3
    config.MAX_DEPTH=4
    config.SUBSAMPLE = 0.5
    config.REG_LAMBDA = 25
    config.PARA={'colsample_bytree':0.9}
    # sfm

config.N_EST = 300

# Generate XGBoost Model
xgb_model = XGB_Model('XGB',config,'./model/XGB/')

Using class weights: 1: 9


In [28]:
pca = PCA(n_components=140,copy=True)
sfm = SelectFromModel(GradientBoostingClassifier(),max_features=120)
rfe = RFE(estimator=LogisticRegression(max_iter=10000), n_features_to_select=80)
vt = VarianceThreshold(threshold=0.01)

if binary:
    trans=pca
else:
    trans=sfm

xgb_model.train((x_train,y_train),(x_valid,y_valid),trans)

[0]	validation_0-error:0.09694	validation_1-error:0.12699
Multiple eval metrics have been passed: 'validation_1-error' will be used for early stopping.

Will train until validation_1-error hasn't improved in 30 rounds.
[1]	validation_0-error:0.10225	validation_1-error:0.12919
[2]	validation_0-error:0.10262	validation_1-error:0.12095
[3]	validation_0-error:0.08979	validation_1-error:0.10885
[4]	validation_0-error:0.09034	validation_1-error:0.11160
[5]	validation_0-error:0.07770	validation_1-error:0.09731
[6]	validation_0-error:0.07403	validation_1-error:0.09621
[7]	validation_0-error:0.06542	validation_1-error:0.08961
[8]	validation_0-error:0.06249	validation_1-error:0.08026
[9]	validation_0-error:0.05937	validation_1-error:0.07642
[10]	validation_0-error:0.06230	validation_1-error:0.08081
[11]	validation_0-error:0.06432	validation_1-error:0.08411
[12]	validation_0-error:0.05901	validation_1-error:0.07587
[13]	validation_0-error:0.06176	validation_1-error:0.07862
[14]	validation_0-error

[136]	validation_0-error:0.01338	validation_1-error:0.02969
[137]	validation_0-error:0.01338	validation_1-error:0.02969
[138]	validation_0-error:0.01301	validation_1-error:0.02859
Stopping. Best iteration:
[108]	validation_0-error:0.01723	validation_1-error:0.02859



## Model evaluation

In [29]:
acc_train,cm_train,f1_train = xgb_model.model_metrics(x_train,y_train)
acc_valid,cm_valid,f1_valid = xgb_model.model_metrics(x_valid,y_valid)
acc_test,cm_test,f1_test = xgb_model.model_metrics(x_test,y_test)
print('acc_train: %f\nf1_train: %f\nconfusion_matrix:\n'%(acc_train,f1_train),cm_train,'\n')
print('acc_valid: %f\nf1_valid: %f\nconfusion_matrix:\n'%(acc_valid,f1_valid),cm_valid,'\n')
print('acc_test: %f\nf1_test: %f\nconfusion_matrix:\n'%(acc_test,f1_test),cm_test)

acc_train: 0.982774
f1_train: 0.944320
confusion_matrix:
 [[4949   92]
 [   2  414]] 

acc_valid: 0.971413
f1_valid: 0.920630
confusion_matrix:
 [[1611   39]
 [  13  156]] 

acc_test: 0.906250
f1_test: 0.897917
confusion_matrix:
 [[267  32]
 [ 10 139]]
