## Multitaper method for feature extraction

Think-count


In [1]:
#Import necessary libraries

import pandas as pd
import numpy as np
from scipy import signal
import os
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import pickle as pkl
import itertools 
import glob
from sklearn import svm 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, plot_confusion_matrix
from sklearn.decomposition import PCA
from spectrum import data_cosine, dpss, pmtm


# %matplotlib inline 
%matplotlib qt


In [5]:
#Importing raw data files 

#.csv path
csvpath = "C:/Users/Wu Di/Documents/EEG-analysis/200108-Readings-csv/thinking-counting-switching.csv"

#Read .csv files
cols_to_use = list(range(4, 36))

#Raw dataframes - each channel is a column
raw_df = pd.read_csv(csvpath, header=None, usecols=cols_to_use)

In [6]:
#Bandpass (BP) filter helper functions

#Creates butterworth BP filter
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5*fs  # Nyquist frequency, which is half of fs
    low = lowcut/nyq  # Digital butterworth filter cutoffs must be normalized to Nyquist frequency
    high = highcut/nyq
    b, a = signal.butter(order, [low, high], btype="bandpass")
    return b, a

def butter_lowpass(cutFreq,fs,order=5):
    nyq = 0.5*fs
    cutFreq = cutFreq/nyq
    b,a = signal.butter(order,cutFreq,btype="lowpass")
    return b,a 

def butter_highpass(cutFreq,fs,order=5):
    nyq = 0.5*fs
    cutFreq = cutFreq/nyq
    b,a = signal.butter(order,cutFreq,btype="highpass")
    return b,a 

#Applies butterworth BP filter
def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
#     filtered_data = signal.lfilter(b, a, data)
    filtered_data = signal.filtfilt(b,a,data)
    return filtered_data

#Applies butterworth lowpass filter
def butter_lowpass_filter(data, cutFreq, fs, order=5):
    b, a = butter_lowpass(cutFreq,fs,order=5)
    filtered_data = signal.filtfilt(b,a,data)
    return filtered_data

#Applies butterworth lowpass filter
def butter_highpass_filter(data, cutFreq, fs, order=5):
    b, a = butter_highpass(cutFreq,fs,order=5)
    filtered_data = signal.filtfilt(b,a,data)
    return filtered_data

#Applies butterworth BP filter to Pandas dataframe 
def bp_filter_df(df, lowcut, highcut, fs, order):
    rows, cols = df.shape  # Get no. of rows and cols in df
    new_index = range(1, rows+1)
    new_cols = range(1, cols+1)
    # Create new df with same no. of rows and cols
    new_df = pd.DataFrame(index=new_index, columns=new_cols)
    # new_df = new_df.fillna(0) #Fill in 0 for all values
    for i in range(cols):  # Apply bp filter each column (channel) and saves in new_df
        filt_col = butter_bandpass_filter(
            df.iloc[:, i].values, lowcut, highcut, fs, order)
        new_df[i+1] = filt_col
    return new_df

#Applies butterworth lowpass filter to Pandas dataframe 
def lp_filter_df(df, cutFreq, fs, order):
    rows, cols = df.shape  # Get no. of rows and cols in df
    new_index = range(1, rows+1)
    new_cols = range(1, cols+1)
    # Create new df with same no. of rows and cols
    new_df = pd.DataFrame(index=new_index, columns=new_cols)
    # new_df = new_df.fillna(0) #Fill in 0 for all values
    for i in range(cols):  # Apply bp filter each column (channel) and saves in new_df
        filt_col = butter_lowpass_filter(
            df.iloc[:, i].values, cutFreq, fs, order)
        new_df[i+1] = filt_col
    return new_df

#Applies butterworth highpass filter to Pandas dataframe 
def hp_filter_df(df, cutFreq, fs, order):
    rows, cols = df.shape  # Get no. of rows and cols in df
    new_index = range(1, rows+1)
    new_cols = range(1, cols+1)
    # Create new df with same no. of rows and cols
    new_df = pd.DataFrame(index=new_index, columns=new_cols)
    # new_df = new_df.fillna(0) #Fill in 0 for all values
    for i in range(cols):  # Apply bp filter each column (channel) and saves in new_df
        filt_col = butter_highpass_filter(
            df.iloc[:, i].values, cutFreq, fs, order)
        new_df[i+1] = filt_col
    return new_df

In [7]:
#Apply BP filtering to raw dataframes
def filt_freq_bands(df,fs):
    delta = lp_filter_df(df, 4.5, fs, 10) 
    theta = bp_filter_df(df, 3.5, 8.5, fs, 6)
    alpha = bp_filter_df(df, 7.5, 12.5, fs, 8)
    beta = bp_filter_df(df, 11.5, 30.5, fs, 16)
    gamma = hp_filter_df(df, 29.5, fs, 50)
    return [delta, theta, alpha, beta, gamma]

fs = 128
# order = 6

all_bands_list = filt_freq_bands(raw_df,fs)

#Split into thinking and counting data frames
think_index_list = []
count_index_list = []

for i in range(6):
    if i%2==0:
        think_index_list+=(list(range(1280*i,1280*(i+1))))
    else:
        count_index_list+=(list(range(1280*i,1280*(i+1))))

think_bands_list = []
count_bands_list = []

for i in range(len(all_bands_list)):
    df = all_bands_list[i].iloc[0:1280*6]
    df_list = np.vsplit(df,6)
    think_df = pd.DataFrame(np.vstack((df_list[0],df_list[2],df_list[4])))
    think_bands_list.append(think_df)
    count_df = pd.DataFrame(np.vstack((df_list[1],df_list[3],df_list[5])))
    count_bands_list.append(count_df)

print(think_bands_list[0].shape)
print(count_bands_list[0].shape)

(3840, 32)
(3840, 32)


In [8]:
#Keep only the alpha and theta bands
#"TA" meaning theta and alpha
think_TA_list = think_bands_list[1:3]
count_TA_list = count_bands_list[1:3]

In [9]:
#Splits a single dataframe into list of equally sized arrays
#Each element in list is nx32 array, where n= sample length 
def split_df(df,fs,sample_t,check=False):
    rows,_ = df.shape #get no. of rows
    sample_len = int(sample_t*fs) #find no. of recorded samples required for each sample time length
    Ns = int(rows/sample_len) #find total no. of samples
    df_cut = df.iloc[:Ns*sample_len] #truncate dataframe to exact multiple of sample length
    # print(Ns*sample_len)
    # print(Ns)
    df_split_list = np.vsplit(df_cut,Ns) #split dataframe row-wise, returns a list
    
    if check:
        print("Total no. of recorded samples: "+str(rows))
        print("Sample length: "+str(sample_len))
        print("Total no. of samples: "+str(Ns))
        print("Length of df_split_list: "+str(len(df_split_list)))
        
        if all(isinstance(x.shape,tuple) for x in df_split_list):
            print("Shape of each element in df_split_list: "+str(df_split_list[0].shape))
        else:
            print("Shapes are wrong.")
            for x in df_split_list:
                print(x.shape)
    return df_split_list,Ns

#Apply split_df() function to list of dataframes, reshape dataframe such that each element is an array 
#for the appropriate sample time length 
def split_bands_list(bands_list,fs,sample_t,check=False,checkSD=False):
    df_list_rFE = [0]*len(bands_list) #dataframes list ready for feature extraction 
    for df_no in range(len(bands_list)):
        df_split_list,Ns = split_df(bands_list[df_no],fs,sample_t,check=checkSD)
        list_of_series = [0]*Ns
        for i in range(len(df_split_list)):
            #New dataframe will have shape Nsx32, each element is a 1xsample_len array 
            new_row = [0]*32 
            #Each df_split_list[i] is a dataframe
            for j in range(len(df_split_list[i].columns)):
                new_row[j] = df_split_list[i].iloc[:,j].values 
            list_of_series[i] = new_row
        df_list_rFE[df_no] = pd.DataFrame(list_of_series)
    if check:
        print("Length of bands_list: "+str(len(bands_list)))
        print("Length of df_list_rFE: "+str(len(df_list_rFE)))
        if (all(isinstance(x.shape,tuple) for x in df_list_rFE)) and (Ns==len(df_list_rFE[0].index)):
            print("Shape of each dataframe in df_list_rFE: "+str(df_list_rFE[0].shape))
    return df_list_rFE
            



In [30]:
#Split filtered dataframes into samples
fs = 128 #sampling freq
sample_t_list = [0.05] #sample time lengths in seconds

#Function to apply split_bands_list function once for each sample time
def apply_multipleSplits(bands_list,fs,sample_t_list,checks=[True,False]):
    #Output is a list of bands_split lists
    bands_splits_lists = [0]*len(sample_t_list)
    for i in range(len(sample_t_list)):
        bands_splits_lists[i] = split_bands_list(bands_list,fs,sample_t_list[i],check=checks[0],checkSD=checks[1])
    return bands_splits_lists


# T_TA_splits_list = apply_multipleSplits(think_TA_list,fs,sample_t_list)

# T_TA_splits_list = split_bands_list(count_TA_list,fs,0.5,check=True,checkSD=False)
# C_bands_split_list = split_bands_list(count_TA_list,fs,sample_t,check=True,checkSD=True)

In [82]:
T_TA_splits_list = split_bands_list(think_TA_list,fs,0.05,check=True,checkSD=False)

Length of bands_list: 2
Length of df_list_rFE: 2
Shape of each dataframe in df_list_rFE: (640, 32)


In [83]:
C_TA_splits_list = split_bands_list(count_TA_list,fs,0.05,check=True,checkSD=False)

Length of bands_list: 2
Length of df_list_rFE: 2
Shape of each dataframe in df_list_rFE: (640, 32)


## Feature Extraction

640


In [84]:
# [theta,alpha]

def pmtm_bands_split_list(bands_list):
    #Applies multitaper method to get PSD estimates, NW=2.5
    PMTM_df_list = [0]*len(bands_list)
    for df_no in range(len(bands_list)):
        PMTM_df_list[df_no] = bands_list[df_no].applymap(lambda x: pmtm(x,NW=2.5))
        PMTM_df_list[df_no] = PMTM_df_list[df_no].applymap(lambda x: np.mean((abs(x[0])**2).transpose()*x[1],axis=1))
        PMTM_df_list[df_no] = PMTM_df_list[df_no].applymap(lambda x: x[:int(len(x)/2)])
    return PMTM_df_list
        

T_PMTM_df_list = pmtm_bands_split_list(T_TA_splits_list)
C_PMTM_df_list = pmtm_bands_split_list(C_TA_splits_list)

In [85]:
#Get AUC for pmtm features 
def AUC_pmtm(PMTM_df_list):
    #Applies multitaper method to get PSD estimates, NW=2.5
    AUC_PMTM_df_list = [0]*len(PMTM_df_list)
    for df_no in range(len(PMTM_df_list)):
        AUC_PMTM_df_list[df_no] = PMTM_df_list[df_no].applymap(lambda x: np.trapz(x))
    return AUC_PMTM_df_list

T_AUC_PMTM_df_list = AUC_pmtm(T_PMTM_df_list)
C_AUC_PMTM_df_list = AUC_pmtm(C_PMTM_df_list)

In [97]:
T_AUC_PMTM_df_list[0].iloc[0,0]

19.46492788050171

In [86]:
#Expand all lists in cells to their own variables

#Expands for a single PSD_df_list
def expand_PSD_df_list(PSD_df_list):
    e_PSD_df_list = [0]*len(PSD_df_list)
    for df_no in range(len(PSD_df_list)):
        #e_PSD_df_cols_list will be used to create new dataframe
        no_PSD = len(PSD_df_list[0].iloc[0,0])
        e_PSD_df_cols_list = [0]*32
        
        for channel in range(len(PSD_df_list[df_no].columns)):
            #Expand each column into its own dataframe
            new_col = PSD_df_list[df_no][channel].apply(pd.Series)
            #Rename every variable in the new column
            new_col = new_col.rename(columns = lambda x: "Ch"+str(channel+1)+'_'+str(np.linspace(0,64,no_PSD)[x]))
            #Add new_col to cols_list
            e_PSD_df_cols_list[channel] = new_col
        
        #Create new dataframe
        e_PSD_df = pd.concat(e_PSD_df_cols_list, axis=1)
        
        #Add to list
        e_PSD_df_list[df_no] = e_PSD_df
    return e_PSD_df_list 

T_e_PMTM_df_list = expand_PSD_df_list(T_PMTM_df_list)
C_e_PMTM_df_list = expand_PSD_df_list(C_PMTM_df_list)



In [88]:
if len(T_e_PMTM_df_list) == len(C_e_PMTM_df_list):
    print(len(T_e_PMTM_df_list))
    for i in range(len(T_e_PMTM_df_list)):
            print("T"+str(i)+str(j)+" shape:"+str(T_e_PMTM_df_list[i].shape))
            print("C"+str(i)+str(j)+" shape:"+str(C_e_PMTM_df_list[i].shape))

2
T00 shape:(640, 4096)
C00 shape:(640, 4096)
T10 shape:(640, 4096)
C10 shape:(640, 4096)


In [90]:
#Create datasets - both theta and alpha bands will be used

#3 types of datasets to be formed:
#Only PSD features
#Only AUC features 
#Both PSD and AUC features 

#Apply to a single expanded PSD_df_list
def get_1F_combos_df_list(e_PSD_df_list): 
    #single feature
    combos = [(0,1)]
    combos_df_list = [0]*len(combos)
    for i in range(len(combos)):
        concat_list = [e_PSD_df_list[x] for x in combos[i]]
        combos_df_list[i] = pd.concat(concat_list,axis=1)
    return combos_df_list

#Apply to a list of expanded PSD_df_lists
def mul_get_1F_combos_df_list(sampLen_e_PSD_df_list):
    sampLen_1F_combos_df_list = [0]*len(sampLen_e_PSD_df_list) 
    for i in range(len(sampLen_e_PSD_df_list)):
        sampLen_1F_combos_df_list[i] = get_1F_combos_df_list(sampLen_e_PSD_df_list[i])
    return sampLen_1F_combos_df_list

#Apply to one set of expanded PSD_df_list and AUC_df_list
def get_2F_combos_df_list(PSD_df_list,AUC_df_list):
    #Two features
    combos= [(0,1)]
    combos_df_list = [0]*len(combos)
    for i in range(len(combos)):
        psd_list = [PSD_df_list[x] for x in combos[i]]
        auc_list = [AUC_df_list[x] for x in combos[i]]
        concat_list = psd_list + auc_list 
        combos_df_list[i] = pd.concat(concat_list,axis=1)
    return combos_df_list

#Apply to multiple sets of expanded PSD_df_list and AUC_df_list
def mul_get_2F_combos_df_list(sampLen_PSD_df_list,sampLen_AUC_df_list):
    if len(sampLen_PSD_df_list) == len(sampLen_AUC_df_list):
        sampLen_2F_combos_df_list = [0]*len(sampLen_PSD_df_list)
        for i in range(len(sampLen_PSD_df_list)):
            sampLen_2F_combos_df_list[i] = get_2F_combos_df_list(sampLen_PSD_df_list[i],sampLen_AUC_df_list[i])
    return sampLen_2F_combos_df_list


#List of dataframes with only PMTM features
T_PMTM_combos_df_list = get_1F_combos_df_list(T_e_PMTM_df_list)
C_PMTM_combos_df_list = get_1F_combos_df_list(C_e_PMTM_df_list)

#List of dataframes with only AUC features
T_AUC_combos_df_list = get_1F_combos_df_list(T_AUC_PMTM_df_list)
C_AUC_combos_df_list = get_1F_combos_df_list(C_AUC_PMTM_df_list)

#List of dataframes with both features
T_AP_combos_df_list = get_2F_combos_df_list(T_e_PMTM_df_list,T_AUC_PMTM_df_list)
C_AP_combos_df_list = get_2F_combos_df_list(C_e_PMTM_df_list,C_AUC_PMTM_df_list)



In [92]:
print(len(T_AP_combos_df_list))

1


In [95]:
#Feature scaling all dataframes 

#Applies feature scaling to one combos_df_list
def featureScaling_df(combos_df_list):
    sc = StandardScaler()
    scaled_combos_df_list = [0]*len(combos_df_list)
    for i in range(len(combos_df_list)):
        df = combos_df_list[i]
        cols = df.columns
        scaled_combos_df_list[i] = pd.DataFrame(sc.fit_transform(df),columns=cols)

    return scaled_combos_df_list

sc_T_PMTM_combos_df_list = featureScaling_df(T_PMTM_combos_df_list)
sc_C_PMTM_combos_df_list = featureScaling_df(C_PMTM_combos_df_list)

sc_T_AUC_combos_df_list = featureScaling_df(T_AUC_combos_df_list)
sc_C_AUC_combos_df_list = featureScaling_df(C_AUC_combos_df_list)

sc_T_AP_combos_df_list = featureScaling_df(T_AP_combos_df_list)
sc_C_AP_combos_df_list = featureScaling_df(C_AP_combos_df_list)

def check_combos_shapes(combos_df_list):
    for i in range(len(combos_df_list)):
            print(combos_df_list[i].shape)

check_combos_shapes(sc_T_PMTM_combos_df_list)
check_combos_shapes(sc_C_PMTM_combos_df_list)
print("------")
check_combos_shapes(sc_T_AUC_combos_df_list)
check_combos_shapes(sc_C_AUC_combos_df_list)
print("------")
check_combos_shapes(sc_T_AP_combos_df_list)
check_combos_shapes(sc_C_AP_combos_df_list)

(640, 8192)
(640, 8192)
------
(640, 64)
(640, 64)
------
(640, 8256)
(640, 8256)


In [98]:
print(len(sc_T_PMTM_combos_df_list[0]))
print(len(sc_C_PMTM_combos_df_list[0]))
print(len(sc_T_AUC_combos_df_list[0]))
print(len(sc_C_AUC_combos_df_list[0]))
print(len(sc_T_AP_combos_df_list[0]))
print(len(sc_C_AP_combos_df_list[0]))

640
640
640
640
640
640


In [100]:
#Append action type columns to all dataframes 

#Add action column for a list of dataframes
def add_action_col(df_list,action_type):
    new_list = [0]*len(df_list)
    for i in range(len(df_list)):
        new_df = df_list[i][:]
        new_df['Action'] = pd.Series(action_type,index=df_list[i].index) #add new column
        new_list[i] = new_df
    return new_list

sc_T_PSD_combosA_df_list = add_action_col(sc_T_PMTM_combos_df_list,'T')
sc_C_PSD_combosA_df_list = add_action_col(sc_C_PMTM_combos_df_list,'C')

sc_T_AUC_combosA_df_list = add_action_col(sc_T_AUC_combos_df_list,'T')
sc_C_AUC_combosA_df_list = add_action_col(sc_C_AUC_combos_df_list,'C')

sc_T_AP_combosA_df_list = add_action_col(sc_T_AP_combos_df_list,'T')
sc_C_AP_combosA_df_list = add_action_col(sc_C_AP_combos_df_list,'C')

def check_shapes(combos_df_list_f):
    for i in range(len(combos_df_list_f)):
        print(combos_df_list_f[i].shape)

check_shapes(sc_T_PSD_combosA_df_list)
check_shapes(sc_C_PSD_combosA_df_list)
print("----")
check_shapes(sc_T_AUC_combosA_df_list)
check_shapes(sc_C_AUC_combosA_df_list)
print("----")
check_shapes(sc_T_AP_combosA_df_list)
check_shapes(sc_C_AP_combosA_df_list)



(640, 8193)
(640, 8193)
----
(640, 65)
(640, 65)
----
(640, 8257)
(640, 8257)


In [101]:
#Combine different class types to form full datasets
def concatSave_df_list(T_df_list,C_df_list,sampLenStrings,filename,savedir,save=False):
    if len(C_df_list) == len(T_df_list):
        for i in range(len(C_df_list)):
            new_df = pd.concat([T_df_list[i],C_df_list[i]],axis=0)
            if save:
                savepath = savedir+filename+'_'+ sampLenStrings[i]+'.pkl'
                #Save to external HDD as pkl files 
                new_df.to_pickle(savepath)
            if i == 0:
                csvpath = savedir+filename+'_'+ sampLenStrings[i]+'.csv'
                new_df.to_csv(csvpath)
    else:
        print("Lists are of unequal lengths.")

sampLenStrings = ["0.05"]
AUC_savedir = "F:\EEG-data\\think-count\multitaper\\featureScaled\AUC/"
PSD_savedir = "F:\EEG-data\\think-count\multitaper\\featureScaled\PSD/"
AP_savedir = "F:\EEG-data\\think-count\multitaper\\featureScaled\AUC_PSD/"
AUC_filename = "AUC_df"
PSD_filename = "PSD_df"
AP_filename = "AP_df"

#PSD
concatSave_df_list(sc_T_PSD_combosA_df_list,sc_C_PSD_combosA_df_list,sampLenStrings,PSD_filename,PSD_savedir,save=True)
#AUC
concatSave_df_list(sc_T_AUC_combosA_df_list,sc_C_AUC_combosA_df_list,sampLenStrings,AUC_filename,AUC_savedir,save=True)
#AUC PSD
concatSave_df_list(sc_T_AP_combosA_df_list,sc_C_AP_combosA_df_list,sampLenStrings,AP_filename,AP_savedir,save=True)

## SVM, Cross Validation, Evaluation

In [2]:
# Directories to load feature scaled datasets
loaddir = 'F:\EEG-data\\think-count\multitaper\\featureScaled/'
loadAUC_dir = loaddir+'AUC/'
loadPSD_dir = loaddir+'PSD/'
loadAUC_PSD_dir = loaddir+'AUC_PSD/'

# Directories to save results
savedir = "F:\EEG-data\\think-count\multitaper\PCA_CV_results/"
saveAUC_dir = savedir+'AUC/'
savePSD_dir = savedir+'PSD/'
saveAUC_PSD_dir = savedir+'AUC_PSD/'

In [3]:
# Cross-validation for SVM 
## Applies PCA only training set to retain 99% variance
## 80% training set, 20% test set
## 5 fold cross validation
def apply_PCA_CV_SVM(loaddir,savedir):
    files = glob.glob(loaddir+'*.pkl')
    sss = StratifiedShuffleSplit(n_splits=5,test_size=0.2,random_state=0)
    testdir = savedir+'testResults/'
    bestF1 = {'F1 Score':0,'dataset':'','params':''}
    test_results = {'F1 Score':[],'dataset':[],'params':[]}
    #Create PCA instances
    pca99 = PCA(n_components=0.99, svd_solver='full')
    #Instantiate SVM gridsearch
    C_range = [1,3,10,30,100,300,1000]
    param_grid= [
                # {'C': C_range, 'kernel': ['linear']},
                {'C': C_range, 'gamma': [0.001, 0.0001, 'auto', 'scale'], 'kernel': ['rbf']},
                ]
    scoring = {'accuracy','f1_macro'}
    clf = svm.SVC()
    grid = GridSearchCV(clf,param_grid=param_grid,scoring=scoring,cv=5,refit='f1_macro')
    #Apply PCA and Gridsearch
    for file in files:
        #Create filenames for saving
        cv99_filename = file.split("\\")[-1].rstrip('.pkl') +'_PCA99_CV_results.pkl'
        df = pd.read_pickle(file)
        X = df.iloc[:,:-1].values
        y = df.iloc[:,-1].values
        #Split into training and test sets
        for train_index, test_index in sss.split(X,y):
            X_train, X_test = X[train_index],X[test_index]
            y_train, y_test = y[train_index],y[test_index]
        #Fit transform on training data
        x_99_train = pca99.fit_transform(X_train)
        #Fit transform on test data
        x_99_test = pca99.transform(X_test)
        #Gridsearch
        grid99 = grid.fit(x_99_train,y_train)
        #Get number of principal components
        _,cols99 = x_99_train.shape
        #Get results
        results_df99 = pd.DataFrame(grid99.cv_results_)
        #Append number of principal components
        results_df99['No. of PC'] = [cols99]*len(results_df99.index) 
        #Save results
        results_df99.to_pickle(savedir+cv99_filename)
        #Export as csv 
        results_df99.to_csv(savedir+cv99_filename+'.csv')
        #Fit SVM on best parameters 
        best_params99 = results_df99.loc[results_df99['rank_test_f1_macro'].idxmin()]['params']
        
        clf99 = svm.SVC(**best_params99)
        clf99.fit(x_99_train,y_train)
        y_99_pred = clf99.predict(x_99_test)

        y_pred_list = [y_99_pred]
        x_test_list = [x_99_test]
        clf_list = [clf99]
        best_params_list =[best_params99]

        #Generate and save classification report, macro-avg f1 score, confusion matrix
        
        target_names = ['T','C']
        names_list = ['99']
        for i in range(len(y_pred_list)):
            df_name = file.split("\\")[-1].rstrip('.pkl')+'_'+names_list[i]
            report_name = file.split("\\")[-1].rstrip('.pkl') + '_'+names_list[i]+'_PCA_CV_SVM_clf_report'
            cm_name = file.split("\\")[-1].rstrip('.pkl') + '_'+names_list[i]+'_PCA_CV_SVM_ConfusionMatrix'
            ##Classification report 
            report = classification_report(y_test, y_pred_list[i], target_names=target_names, output_dict=True)
            report_df = pd.DataFrame(report).transpose()
            report_df.to_pickle(testdir+report_name+'.pkl')
            ##Macro-avg f1 score 
            f1Score = report['macro avg']['f1-score']
            test_results['F1 Score'].append(f1Score)
            test_results['dataset'].append(df_name)
            test_results['params'].append(best_params_list[i])
            test_results_df = pd.DataFrame(test_results)
            if bestF1['F1 Score']<f1Score:
                bestF1['F1 Score'] = f1Score
                bestF1['dataset'] = df_name
                bestF1['params'] = str(best_params_list[i])
            ##Confusion matrix
            fig,ax = plt.subplots()
            ax.set_title(df_name+' CM')
            plot_confusion_matrix(clf_list[i],x_test_list[i],y_test,labels=target_names,ax=ax,normalize='true')
            plt.savefig(testdir+df_name+'.png')
            plt.close()

    bestF1_df = pd.DataFrame(bestF1,index=[0],columns=['F1 Score','dataset','params'])
    bestF1_df.to_csv(testdir+bestF1['dataset']+'.csv')
    test_results_df.to_csv(testdir+'test_results.csv')
    test_results_df.to_csv(testdir+'test_results.pkl')
        


apply_PCA_CV_SVM(loadAUC_dir,saveAUC_dir)
apply_PCA_CV_SVM(loadPSD_dir,savePSD_dir)
apply_PCA_CV_SVM(loadAUC_PSD_dir,saveAUC_PSD_dir)
