In [1]:
from lib.config import Config_f
from lib.data_set import Features
from lib.model import SimpleModel
import lib.utils

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder, normalize
from sklearn.decomposition import PCA, NMF
from sklearn.feature_selection import SelectKBest,f_classif,chi2,mutual_info_classif,VarianceThreshold,RFE,SelectFromModel
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import train_test_split,RandomizedSearchCV,PredefinedSplit,GridSearchCV
from sklearn import metrics
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

In [3]:
# read file name of data with various Labels
df = pd.read_csv('./useful_data_label.csv',index_col=0) 
# read file name of data with only label 0
df2 = pd.read_csv('./unuseful_data_label.csv',index_col=0)
# read some of the data with only label 0
df3 = pd.read_csv('./data/file_name.txt',header=None)
# player = ctypes.windll.kernel32

ind = df2.iloc[1].isna()
files = np.concatenate([np.array(df.columns),np.array('normal/'+df2.columns[ind])])

In [4]:
# Override the base class of Config and Features for SVM Model
class SVM_Config(Config_f):
    NAME = 'SVM'
    NUM_CLASSES = 2

    CLASS_WEIGHTS = None
    FN_LP = 300
    DETREND_LAMBDA = 50
    TEST_FILES = files[[5,30,31,32,33,34,35]]
    
    # SVM parameters
    KERNEL = "rbf"
    GAMMA = "auto"
    C = 10
    CLASS_WEIGHTS = None
    
class SVM_dataset(Features):
    
    def __init__(self,config):
        super(SVM_dataset,self).__init__(config)
        self.config = config

In [5]:
# Generate SVM configuration
config = SVM_Config()
config.display()

data = SVM_dataset(config)


Configurations:
BINS                           3
C                              10
CHANNELS                       ['LEFT_TA', 'LEFT_TS', 'LEFT_BF', 'LEFT_RF', 'RIGHT_TA', 'RIGHT_TS', 'RIGHT_BF', 'RIGHT_RF']
CLASS_WEIGHTS                  None
DETREND_LAMBDA                 50
DROP_WITH_ZSCORE               None
FEATURES_LIST                  ['IEMG', 'SSI', 'WL', 'ZC', 'ku', 'SSC', 'skew', 'Acti', 'AR', 'HIST', 'MDF', 'MNF', 'mDWT']
FN_HP                          None
FN_IR                          False
FN_LP                          300
GAMMA                          auto
KERNEL                         rbf
LEVEL_DWT                      3
NAME                           SVM
NUM_CLASSES                    2
NUM_MF                         3
N_ENV                          20
RANGES                         (-3, 3)
RECT                           False
REMOVE_FREQS                   True
SAME_LABEL                     True
SCALE                          True
SHUFFLE                        

In [None]:
# Choose features to use
data.feature_list = ['IEMG', 'SSI', 'WL', 'ZC', 'ku', 'SSC', 'skew', 'Acti', 'AR', 'HIST', 'MF','MDF', 'MNF', 'mDWT']
data.num_mf = 10

# Load data from files
data.load_data(files)

# Extract features from data
data.extract_features()

X_train,Y_train,F1 = data.train_set
X_valid,Y_valid,F2 = data.valid_set
X_test, Y_test, F3 = data.test_set

skip
skip
3/174: G06_FoG_trial_1_emg.csv
4/174: G06_FoG_trial_2_emg.csv
5/174: G06_FoG_trial_3_emg.csv
6/174: G07_Freezing_Trial1_trial_1_emg.csv
7/174: G08_FoG_1_trial_1_emg.csv
8/174: G08_FoG_2_trial_1_emg.csv
9/174: G11_FoG_trial_1_emg.csv
10/174: G11_FoG_trial_2_emg.csv
11/174: P379_M050_2_OFF_A_FoG_trial_1_emg.csv
12/174: P379_M050_2_OFF_A_FoG_trial_2_emg.csv
13/174: P379_M050_2_OFF_A_FoG_trial_3_emg.csv
14/174: P379_M050_2_OFF_B_FoG_trial_1_emg.csv
15/174: P379_M050_2_OFF_B_FoG_trial_2_emg.csv
16/174: P379_M050_2_OFF_B_FoG_trial_3_emg.csv
17/174: P551_M050_2_A_FoG_trial_1_emg.csv
18/174: P551_M050_2_B_FoG_trial_1_emg.csv
19/174: P551_M050_2_B_FoG_trial_2_emg.csv
20/174: P812_M050_2_B_FoG_trial_1_emg.csv
21/174: P812_M050_2_B_FoG_trial_2_emg.csv
22/174: normal/G02_Walking_trial_1_emg.csv
23/174: normal/G03_Walking_trial_1_emg.csv
24/174: normal/G03_Walking_trial_2_emg.csv
25/174: normal/G05_Walking_struct_fixed_trial_1_emg.csv
26/174: normal/G05_Walking_struct_fixed_trial_2_emg.cs

In [None]:
X=np.concatenate([X_train,X_valid,X_test])
Y=np.concatenate([Y_train,Y_valid,Y_test])
F=np.concatenate([F1,F2,F3])

In [10]:
def train_rnd(model,feature,y,binary=True):
    if binary:
        ind = ((y==0)|(y==1)|(y==2)|(y==3)|(y==4)|(y==6))
        ind1 = ((y==1)|(y==2)|(y==3)|(y==4)|(y==6))
        y_01 = y.copy()
        y_01[ind1] = 1
        metric = 'error'
    else:
        ind = ((y==1)|(y==2)|(y==6))
        y_01 = y[ind].copy()
        metric = 'merror'
        
    # scaler = MinMaxScaler()
    # x_train=scaler.fit_transform(feature[ind])
    # if transformer != None:
    #   x_train=transformer.fit_transform(x_train,y_01)
    model.fit(feature[ind], y_01)

#     eval_set=[(x_train,y_train),(x_test,y_test)]
#     model.fit(x_train, y_train,eval_metric=[metric],
#               eval_set=eval_set,early_stopping_rounds=30)


In [11]:
binary=True
if binary:
    ind = ((Y==0)|(Y==1)|(Y==2)|(Y==3)|(Y==4)|(Y==6))
    score='pr_auc'
else:
    ind = ((Y==1)|(Y==2)|(Y==6))
    score='f1_macro'
test_fold = [-1]*len(F[ind])
for i,f in enumerate(F[ind]):
    if f in files[[5,30,31,32,33,34,35]]:
      test_fold[i]=0
   if f in files[[6,50,51,52,53,54,55]]:
    test_fold[i]=1
  if f in files[[7,40,41,42,43,44,45]]:
    test_fold[i]=2
ps = PredefinedSplit(test_fold=test_fold)

In [12]:
model = SVC(kernel=config.KERNEL,
          C=config.C,
          class_weight=config.CLASS_WEIGHTS,
          gamma=0.004)

#train,valid,test = train_model(model,feature,y)
#print([train,valid,test])
#player.Beep(1000,200)

In [None]:
# scaler = MinMaxScaler()
scaler = StandardScaler()
pca = PCA(n_components=160,copy=True)
vt = VarianceThreshold(threshold=0.03)
sfm = SelectFromModel(GradientBoostingClassifier(),max_features=80)
rfe = RFE(estimator=LogisticRegression(max_iter=10000), n_features_to_select=100)
method={'pca':pca,'vt':vt,'sfm':sfm,'rfe':rfe}
for m in ['vt','sfm','rfe']:
  if m != 'vt':
    pipe = Pipeline([('scaler', scaler),            
              ('select', method[m]),
              ('model', model)]) 
  else:
    pipe = Pipeline([('scaler', scaler),            
              # ('select', method[m]),
              ('model', model)]) 

  param_distribs={'model__C':np.arange(1,50),
          # 'model__class_weight':[{0:1,1:2},{0:1,1:3},{0:1,1:4},{0:1,1:5},{0:1,1:6},{0:1,1:7},{0:1,1:8},{0:1,1:9},{0:1,1:10}],
          # 'model__kernel':['linear','poly','rbf'],
          # 'model__degree':np.arange(2,10),
          # 'model__coef0':np.arange(0,10),
          'model__gamma':['auto','scale'],
          # 'select__n_components':np.arange(80,190,20),
          # 'select__threshold':np.arange(0.01,0.05,0.01),
          # 'select__n_features_to_select':np.arange(80,190,20)
          }
  if m == 'pca':
      param_distribs['select__n_components']=np.arange(80,190,20)
  # elif m=='vt':
      # param_distribs['select__threshold']=np.arange(0.01,0.05,0.01)
  elif m=='rfe':
      param_distribs['select__n_features_to_select']=np.arange(80,190,20)
  if binary:
    param_distribs['model__class_weight']=[{0:1,1:2},{0:1,1:3},{0:1,1:4},{0:1,1:5},{0:1,1:6},{0:1,1:7},{0:1,1:8},{0:1,1:9},{0:1,1:10}]
  search_params = {
            'estimator': pipe,
            'param_distributions': param_distribs,  
            'cv': ps,                     
            'n_jobs': -1,  
            'verbose': 32,
            # 'scoring': metrics.make_scorer(metrics.f1_score),
            'n_iter': 500}
  rnd_search_cv=RandomizedSearchCV(**search_params)
  grid_params = {
            'estimator': pipe,
            'param_grid': param_distribs,  
            'cv': ps,                     
            'n_jobs': -1,  
            'verbose': 32,
            'scoring': score,
            }
  grid_search_cv=GridSearchCV(**grid_params)
  # rnd_search_cv=RandomizedSearchCV(model,param_distribs,cv=ps)
  train_rnd(rnd_search_cv,X[ind],Y[ind],binary)
  with open('./svm_binary_st.txt','a') as f:
    f.write(m+':')
    f.write('\n')
    f.write(str(rnd_search_cv.best_params_))
    f.write('\n')
    f.write('\n')
    f.write(str(rnd_search_cv.best_score_))
    f.write('\n')
    f.write('\n')

Fitting 3 folds for each of 500 candidates, totalling 1500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:    6.1s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:    8.6s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   10.9s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   11.0s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   13.4s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   13.6s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   15.9s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   16.2s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   18.4s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:   

Fitting 3 folds for each of 500 candidates, totalling 1500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  8.1min
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  8.2min
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  9

In [None]:
# model_temp=rnd_search_cv.best_estimator_
#test_model(model_temp,feature,y)
rnd_search_cv.cv_results_
# rnd_search_cv.best_params_

{'mean_fit_time': array([1.76756859, 1.40469289, 0.39165123, 2.0227085 , 1.17936389,
        0.30565969, 0.28494589, 2.45907942, 0.49606911, 2.02163061,
        0.97959312, 0.29782613, 0.83407084, 0.5312055 , 1.85495623,
        0.71291717, 0.95100896, 0.63267152, 0.50071534, 0.98681895,
        2.74561397, 0.42185497, 1.11710771, 1.3054498 , 0.67507148,
        0.62527402, 1.2017649 , 3.22322456, 0.45882789, 1.86303854,
        1.18943683, 1.54959154, 2.37070529, 3.1981794 , 3.22589008,
        2.07654834, 0.69844039, 2.72040399, 2.21034543, 0.46970026,
        1.21714711, 0.77016465, 1.61650189, 1.52797445, 2.40983168,
        1.91115959, 0.42826343, 1.85661395, 0.76103894, 1.40731549,
        2.991359  , 1.31164217, 1.78612177, 1.34575367, 2.80408462,
        1.23294632, 0.78034258, 0.86033869, 0.51128976, 1.69173129,
        0.84854658, 1.56291866, 2.95345187, 0.81213752, 0.55029853,
        1.8569502 , 0.54286742, 0.93109345, 2.27159723, 2.56522409,
        1.49887005, 0.51725833,

In [None]:
rnd_search_cv.best_params_

In [None]:
rnd_search_cv.best_params_

{'model__C': 25,
 'model__class_weight': {0: 1, 1: 2},
 'model__gamma': 'auto',
 'select__n_features_to_select': 140}

In [None]:
rnd_search_cv.best_score_

In [None]:
rnd_search_cv.best_score_

0.8540979985793739

In [None]:
np.var(scaler.fit_transform(X),axis=0)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.])