## Prepare previous data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
import os

# our own pipeline
from pipelines.data_prapare import pack_data
from pipelines.tools import plot_intervals
from pipelines.tools import power_band, one_signal_band_power, power_band_timeslice

from sklearn.preprocessing import MinMaxScaler

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
data_source_path = "./data/Tests_EEG_Lintao/"
filename_list = os.listdir("./data/Tests_EEG_Lintao/")
path_list = list()
for item in filename_list:
    path_list.append(os.path.join(data_source_path, item))

alphabet_list, asl_list, alphabet_vision, alphabet_imagination, asl_vision, asl_imagination = pack_data(path_list)

# all labels are same order
labels = list()
for item in alphabet_vision:
    labels.append(item[-1])

In [4]:
feature_path = "./data/EEG_features_Lintao/"  
bp_feature_path = feature_path + "band_power/"

In [5]:
delta_BP = (0.5,4) # (0.1,4) (0.3, 4)
theta_BP = (4,8) 
alpha_BP = (8,13) 
beta_BP= (13,30)  # (13,32)
gamma_BP = (30,100) # (32, 100) (32, inf)
band_name = ["δ" , "θ" , "α" , "β" , "γ"]

In [6]:
alphabet_1 = alphabet_vision[0]

# alphabet_1
# (0, (2283, 2642), 'vision', 'alphabet', 9, 'I')

example = alphabet_list[0].iloc[2283:2643,0:16]
example

Unnamed: 0,ch1,ch2,ch3,ch4,ch5,ch6,ch7,ch8,ch9,ch10,ch11,ch12,ch13,ch14,ch15,ch16
2283,-3350.615901,-6455.139095,2773.650321,-117.279603,-2065.480002,-4420.459797,-4684.053920,-4490.465461,7000.633419,-1214.414980,3737.390487,10966.145809,-3555.223770,-2529.926900,-2888.582991,1203.574384
2284,-3354.527456,-6465.622063,2772.309217,-116.653754,-2067.178734,-4421.063295,-4684.120975,-4491.448938,7000.879288,-1209.497596,3733.747153,10953.181798,-3553.547389,-2532.318536,-2886.169003,1206.100131
2285,-3345.765572,-6459.832962,2773.963246,-118.061914,-2067.670473,-4421.845606,-4685.439728,-4493.237077,6992.676198,-1226.842550,3735.579996,10955.707545,-3556.676633,-2541.214531,-2889.074730,1202.903831
2286,-3356.047375,-6460.257645,2774.075004,-120.878234,-2062.596627,-4420.839777,-4683.942161,-4490.353702,6997.504174,-1199.171090,3735.736458,10955.394620,-3551.960415,-2521.992030,-2886.191355,1204.669619
2287,-3348.224264,-6460.794087,2773.672673,-115.603222,-2070.173868,-4422.247937,-4689.910077,-4494.712293,6996.274828,-1208.491767,3735.334126,10954.791123,-3553.905017,-2524.316612,-2887.666570,1203.999067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2638,-3339.060049,-6440.610461,2805.970944,-137.776153,-2030.119542,-4433.602623,-4729.271499,-4534.476046,7047.616785,-1237.973718,3767.967673,10911.495794,-3520.802083,-2536.766533,-2891.198145,1191.526793
2639,-3350.973529,-6447.427743,2804.831005,-142.782944,-2026.856187,-4434.004955,-4726.812807,-4532.084409,7048.488504,-1250.691861,3765.017243,10883.265541,-3517.359914,-2533.525531,-2887.912439,1193.761968
2640,-3333.270947,-6437.816493,2806.932069,-138.357298,-2033.449952,-4435.591928,-4733.853606,-4537.962918,7049.404925,-1241.818218,3765.866609,10894.776689,-3520.064476,-2524.473074,-2890.147613,1191.638552
2641,-3343.351584,-6443.471485,2807.110883,-140.614824,-2025.716248,-4435.524873,-4727.304545,-4533.313755,7055.529303,-1245.103925,3770.336958,10897.950637,-3516.689362,-2530.977432,-2889.164137,1190.856241


So far, some ways to generate features for model training:

1. pure power band(three methods)
2. generate own EEG image with positions(should be adapted)
3. use MNE EEG images
4. consider time features, use time step power band
5. consider time features, use time step power band images

Some codes are commented in the section below, they won't need to re run them.

## features tables

All the reference that help the codes below are listed:

* https://github.com/JingweiToo/EEG-Feature-Extraction-Toolbox

* https://github.com/CisottoGiulia/EEG-EMG-analytics

* https://github.com/yangsh827/Seizure_FE

* https://github.com/sari-saba-sadiya/EEGExtract

* https://github.com/N-kalaivaani-IT/EEG-Feature-Extraction

* https://github.com/forrestbao/pyeeg; https://github.com/shaikhsadaf/Feature-Extraction-of-EEG-Signals

* https://github.com/raphaelvallat/antropy




After reading the references above, I got two py files for eeg features generation:

1. EEGExtract.py , original code, just add a mean frequency, but some functions are duplicated, or useless
2. eeg_features.py, a combined code, with a lot of features, but LLE are not using
3. Non_use_fe.py, other codes, which are not using in our case

But in fact, some features are produced by the normalized signals instead of the original one, otherwise, there will be some problem, but it
doesn't matter, because before we train a model, all features will pass the sklearn normalization function.

In [7]:
import pipelines.EEGExtract as eeg_et
import pipelines.eeg_features as eeg_fe 
import glob

### eeg_feature

In [8]:
# %%timeit
# # ant LZC is too slow
# eeg_fe.LZC(np.array(example['ch1']))

In [9]:
# eeg_fe.LLE(np.array(example['ch1']),2,4)

In [10]:
eeg_fe.cardinality(np.array(example['ch12']))

337.0

In [11]:
from sklearn.preprocessing import MinMaxScaler
def eeg_feature(data, sf=125):
    # data : 2-d array
    result = dict()
    mm = MinMaxScaler()
    mm_data = mm.fit_transform(data)
    for i in range(data.shape[-1]):

        ch = data[:,i]
        mm_ch = mm_data[:,i]
        
        result['ch' + str(i+1) + '_' + 'min'] =  eeg_fe.min_X(ch) 
        result['ch' + str(i+1) + '_' + 'max'] =  eeg_fe.max_X(ch)
        result['ch' + str(i+1) + '_' + 'std'] =  eeg_fe.std_X(ch)
        result['ch' + str(i+1) + '_' + 'mean'] =  eeg_fe.mean_X(ch)
        result['ch' + str(i+1) + '_' + 'coefficient_variation'] =  eeg_fe.coefficient_variation(ch)
        result['ch' + str(i+1) + '_' + 'mean_abs'] =  eeg_fe.mean_absolute_X(ch)
        result['ch' + str(i+1) + '_' + 'AAC'] =  eeg_fe.average_amplitude_change(ch)
        result['ch' + str(i+1) + '_' + 'CARD'] =  eeg_fe.cardinality(ch)
        # print(eeg_fe.carinality(ch))
        
        result['ch' + str(i+1) + '_' + 'EMAV'] =  eeg_fe.enhanced_mean_absolute(mm_ch)
        result['ch' + str(i+1) + '_' + 'median'] =  eeg_fe.median_X(ch)
        result['ch' + str(i+1) + '_' + 'MAP'] =  eeg_fe.mean_amplitude_power(ch)
        result['ch' + str(i+1) + '_' + 'signal_energy'] =  eeg_fe.signal_energy(ch)
        result['ch' + str(i+1) + '_' + 'mean_energy'] =  eeg_fe.mean_energy(ch)
        result['ch' + str(i+1) + '_' + 'waveform_length'] =  eeg_fe.waveform_length(ch)
        
        # all nan, pass this
#         result['ch' + str(i+1) + '_' + 'EML'] =  eeg_fe.enhanced_wave_length(ch)
#         print(eeg_fe.enhanced_wave_length(mm_ch))
        
        # sum_diff, min_diff, max_diff, mean_diff, median_diff
        diff_1 = eeg_fe.first_order_diff(ch)
        diff_2 = eeg_fe.second_order_diff(ch)
        result['ch' + str(i+1) + '_' + '1_sum_diff'] =  diff_1[0]
        result['ch' + str(i+1) + '_' + '1_min_diff'] =  diff_1[1]
        result['ch' + str(i+1) + '_' + '1_max_diff'] =  diff_1[2]
        result['ch' + str(i+1) + '_' + '1_mean_diff'] =  diff_1[3]
        result['ch' + str(i+1) + '_' + '1_median_diff'] =  diff_1[4]
        result['ch' + str(i+1) + '_' + '2_sum_diff'] =  diff_2[0]
        result['ch' + str(i+1) + '_' + '2_min_diff'] =  diff_2[1]
        result['ch' + str(i+1) + '_' + '2_max_diff'] =  diff_2[2]
        result['ch' + str(i+1) + '_' + '2_mean_diff'] =  diff_2[3]
        result['ch' + str(i+1) + '_' + '2_median_diff'] =  diff_2[4]

        
        result['ch' + str(i+1) + '_' + 'log_energy_entropy'] =  eeg_fe.log_energy_entropy(ch)
        # very close values 
        result['ch' + str(i+1) + '_' + 'renyi_entropy'] =  eeg_fe.renyi_entropy(ch)
        # print(eeg_fe.renyi_entropy(ch))
        result['ch' + str(i+1) + '_' + 'LRSSV'] =  eeg_fe.log_root_sum_of_sequential_Variation(ch)
        result['ch' + str(i+1) + '_' + 'MCL'] =  eeg_fe.mean_curve_length(ch)
        result['ch' + str(i+1) + '_' + 'mean_teager_energy'] =  eeg_fe.mean_teager_energy(ch)
        result['ch' + str(i+1) + '_' + 'var'] =  eeg_fe.var_X(ch)
        result['ch' + str(i+1) + '_' + 'totalVariation'] =  eeg_fe.totalVariation(ch)
        result['ch' + str(i+1) + '_' + 'skew'] =  eeg_fe.skew_X(ch)
        result['ch' + str(i+1) + '_' + 'kurtosis'] =  eeg_fe.kurs_X(ch)
        result['ch' + str(i+1) + '_' + 'rms'] =  eeg_fe.rms_X(ch)
        result['ch' + str(i+1) + '_' + 'peak'] =  eeg_fe.peak_X(ch)
        # closing
        result['ch' + str(i+1) + '_' + 'PAPR'] =  eeg_fe.papr_X(ch)
        
#         # pEA5, pED5, pED4, pED3, pED2, pED1  wavelet; but wavelet power is not a standard power, so skip this feature for now
#         wavelet_power = eeg_fe.relativePower(ch)
#         result['ch' + str(i+1) + '_' + 'wavelet_power51'] =  wavelet_power[0]
#         result['ch' + str(i+1) + '_' + 'wavelet_power52'] =  wavelet_power[1]
#         result['ch' + str(i+1) + '_' + 'wavelet_power4'] =  wavelet_power[2]
#         result['ch' + str(i+1) + '_' + 'wavelet_power3'] =  wavelet_power[3]
#         result['ch' + str(i+1) + '_' + 'wavelet_power2'] =  wavelet_power[4]
#         result['ch' + str(i+1) + '_' + 'wavelet_power1'] =  wavelet_power[5]
        
        result['ch' + str(i+1) + '_' + 'wavelet_entropy'] =  eeg_fe.wavelet_entopy(ch)
        result['ch' + str(i+1) + '_' + 'hurst'] =  eeg_fe.Hurst(ch)
        # closing
        result['ch' + str(i+1) + '_' + 'PFD'] =  eeg_fe.Petrosian_FD(ch)
        
        result['ch' + str(i+1) + '_' + 'sample_entropy'] =  eeg_fe.sample_entropy(ch)
    
        
        # two different permutation_entropy, I use ant
        PE = eeg_fe.permutation_entropy(ch)
        # result['ch' + str(i+1) + '_' + 'pye_permutation_entropy'] =  PE[0]
        result['ch' + str(i+1) + '_' + 'ant_permutation_entropy'] =  PE[1]


        hjorth =  eeg_fe.Hjorth(ch)
        result['ch' + str(i+1) + '_' + 'hjorth_activity'] = hjorth[0]
        result['ch' + str(i+1) + '_' + 'hjorth_mobility'] = hjorth[1]
        result['ch' + str(i+1) + '_' + 'hjorth_complexity'] = hjorth[-1] 
 
        result['ch' + str(i+1) + '_' + 'KFD'] = eeg_fe.KFD(ch)
    
        # two different DFA, I use ant
        DFA = eeg_fe.DFA(ch)
        result['ch' + str(i+1) + '_' + 'DFA'] = DFA[0]
        #  result['ch' + str(i+1) + '_' + 'DFA_2'] = DFA[1]

        result['ch' + str(i+1) + '_' + 'HFD'] = eeg_fe.HFD(ch)
        result['ch' + str(i+1) + '_' + 'shannon_entropy'] =  eeg_fe.shannon_entropy(ch)
        result['ch' + str(i+1) + '_' + 'spectral_entropy'] =  eeg_fe.spectral_entropy(ch)
        result['ch' + str(i+1) + '_' + 'approximate_entropy'] =  eeg_fe.approximate_entropy(ch)
        result['ch' + str(i+1) + '_' + 'svd_entropy'] =  eeg_fe.svd_entropy(ch)
        
        # all are 0
        result['ch' + str(i+1) + '_' + 'num_zero_crossing'] =  eeg_fe.num_zero_crossing(ch)
        # print(eeg_fe.num_zero_crossing(ch))
        
        # own LZC, not ant LZC, ant LZC is too slow
        result['ch' + str(i+1) + '_' + 'LZC'] = eeg_fe.LZC(ch)
        # print(eeg_fe.LZC(ch))
        
        # welch power band
        pb = one_signal_band_power(ch, method='welch')
        result['ch' + str(i+1) + '_' + 'pb_delta'] =  pb[0]
        result['ch' + str(i+1) + '_' + 'pb_theta'] =  pb[1]
        result['ch' + str(i+1) + '_' + 'pb_alpha'] =  pb[2]
        result['ch' + str(i+1) + '_' + 'pb_beta'] =  pb[3]
        result['ch' + str(i+1) + '_' + 'pb_gamma'] =  pb[4]
        result['ch' + str(i+1) + '_' + 'alpha/delta'] =  pb[2]/pb[0]
       
    
    return result      

In [12]:
# fe1 = eeg_feature(np.array(example))
# len(fe1)
# 944

In [13]:
# fe1

### EEG extract
####  Complexity Features

In [14]:
# eegData: 3D np array [chans x ms x epochs] 
fs = 125
eegData = np.array(example).reshape(16,-1,1)
eegData.shape

(16, 360, 1)

In [15]:
# Tsalis Entropy (n=1)

# alphabet_2 = alphabet_vision[2] 
# # alphabet_2
# # (0, (3064, 3424), 'vision', 'alphabet', 6, 'F')
# example_2 = alphabet_list[0].iloc[3786:4206,0:16]
# mm = MinMaxScaler()
# mm_data = mm.fit_transform(example_2)
# eegData_2 = np.array(mm_data).reshape(16,-1,1)

# signals need to be normalized, otherwise sometimes there will be an error
orders = [1] # list(range(1,10+1))
tsalisRes = eeg_et.tsalisEntropy(eegData, bin_min=-200, bin_max=200, binWidth=2,orders=orders)
tsalisRes = np.array(tsalisRes)
tsalisRes.shape

(1, 16, 1)

In [16]:
# Subband Information Quantity
# delta (0.5–4 Hz)
eegData_delta = eeg_et.filt_data(eegData, 0.5, 4, fs)
ShannonRes_delta = eeg_et.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
# theta (4–8 Hz)
eegData_theta = eeg_et.filt_data(eegData, 4, 8, fs)
ShannonRes_theta = eeg_et.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
# alpha (8–13 Hz)
eegData_alpha = eeg_et.filt_data(eegData, 8, 13, fs)
ShannonRes_alpha = eeg_et.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
# beta (13–30 Hz)
eegData_beta = eeg_et.filt_data(eegData, 13, 30, fs)
ShannonRes_beta = eeg_et.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
# gamma (30–100 Hz), but fs should > 2*high, so we use 60
eegData_gamma = eeg_et.filt_data(eegData, 30, 60, fs)
ShannonRes_gamma = eeg_et.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)
# ShannonRes_delta.shape

In [17]:
# Cepstrum Coefficients (n=2)
CepstrumRes = eeg_et.mfcc(eegData, fs,order=2)
# CepstrumRes.shape

In [18]:
# Lyapunov Exponent
LyapunovRes = eeg_et.lyapunov(eegData)
# LyapunovRes.shape

In [19]:
# all same 0

# False Nearest Neighbor
FalseNnRes = eeg_et.falseNearestNeighbor(eegData)
FalseNnRes.shape

(16, 1)

In [20]:
# Not working
# ARMA Coefficients (n=2)
# armaRes = eeg_et.arma(eegData,order=13)

####  Category Features

In [21]:
# Median Frequency
medianFreqRes = eeg_et.medianFreq(eegData,fs)
# medianFreqRes.shape

In [22]:
# Median Frequency
meanFreqRes = eeg_et.meanFreq(eegData,fs)
# meanFreqRes

In [23]:
# Cannot compute because fs < 2*100

# # δ band Power
# bandPwr_delta = eeg_et.bandPower(eegData, 0.5, 4, fs)
# # θ band Power
# bandPwr_theta = eeg_et.bandPower(eegData, 4, 8, fs)
# # α band Power
# bandPwr_alpha = eeg_et.bandPower(eegData, 8, 13, fs)
# # β band Power
# bandPwr_beta = eeg_et.bandPower(eegData, 13, 30, fs)
# # γ band Power
# bandPwr_gamma = eeg_et.bandPower(eegData, 30, 60, fs)


# α/δ Ratio
# ratio_res = eeg_et.eegRatio(eegData,fs)

In [24]:
# Regularity (burst-suppression)
regularity_res = eeg_et.eegRegularity(eegData,fs)
# regularity_res.shape

In [25]:
# All are nan, pass

# # Voltage < 5μ
# volt05_res = eeg_et.eegVoltage(eegData,voltage=5)
# # Voltage < 10μ
# volt10_res = eeg_et.eegVoltage(eegData,voltage=10)
# # Voltage < 20μ
# volt20_res = eeg_et.eegVoltage(eegData,voltage=20)


# # Burst Band Power for δ
# burstBandPwrAlpha = eeg_et.burstBandPowers(eegData, 0.5, 4, fs)
# burstBandPwrAlpha

In [26]:
# all are 0

# # Diffuse Slowing
# df_res = eeg_et.diffuseSlowing(eegData)
# df_res

# # Spikes
# minNumSamples = int(70*fs/1000)
# spikeNum_res = eeg_et.spikeNum(eegData,minNumSamples)
# spikeNum_res

# # Delta burst after Spike
# deltaBurst_res = eeg_et.burstAfterSpike(eegData,eegData_delta,minNumSamples=7,stdAway = 3)
# deltaBurst_res

# # Sharp spike
# sharpSpike_res = eeg_et.shortSpikeNum(eegData,minNumSamples)
# sharpSpike_res

# # Number of Bursts
# numBursts_res = eeg_et.numBursts(eegData,fs)
# numBursts_res

# # Burst length μ and σ
# burstLenMean_res,burstLenStd_res = eeg_et.burstLengthStats(eegData,fs)
# burstLenStd_res

# # Number of Suppressions
# numSupps_res = eeg_et.numSuppressions(eegData,fs)
# numSupps_res

# # Suppression length μ and σ
# suppLenMean_res,suppLenStd_res = eeg_et.suppressionLengthStats(eegData,fs)
# suppLenStd_res

In [27]:
# all same 1, so pass
# # Connectivity features- Coherence - δ
# coherence_res = eeg_et.coherence(eegData,fs)
# coherence_res

So for EEGExtract.py, the useful features in our case are:

Tsalis Entropy, Subband Information Quantity, Cepstrum Coefficients, Lyapunov Exponent, Median Frequency, Regularity (burst-suppression)

In [28]:
def eeg_feature_2(data, fs=125):
    # eegData: 3D np array [chans x ms x epochs] 
    eegData = np.array(data).reshape(16,-1,1)
    result = dict()
    
    # # signals need to be normalized, otherwise sometimes there will be an error
    mm = MinMaxScaler()
    mm_data = mm.fit_transform(data)
    mm_eegData = np.array(mm_data).reshape(16, -1, 1)
    orders = [1]  # list(range(1,10+1))
    tsalisRes = eeg_et.tsalisEntropy(mm_eegData, bin_min=-200, bin_max=200, binWidth=2, orders=orders)
    tsalisRes = np.array(tsalisRes).reshape(16, )
    
    for i in range(16):
        result['ch' + str(i + 1) + '_' + 'tsalis_entropy'] = tsalisRes[i]
        
                 
    # Subband Information Quantity
    eegData_delta = eeg_et.filt_data(eegData, 0.5, 4, fs)
    ShannonRes_delta = eeg_et.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
    eegData_theta = eeg_et.filt_data(eegData, 4, 8, fs)
    ShannonRes_theta = eeg_et.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
    eegData_alpha = eeg_et.filt_data(eegData, 8, 13, fs)
    ShannonRes_alpha = eeg_et.shannonEntropy(eegData_alpha, bin_min=-200, bin_max=200, binWidth=2)
    eegData_beta = eeg_et.filt_data(eegData, 13, 30, fs)
    ShannonRes_beta = eeg_et.shannonEntropy(eegData_beta, bin_min=-200, bin_max=200, binWidth=2)
    # fs should > 2*high, so we use 60
    eegData_gamma = eeg_et.filt_data(eegData, 30, 60, fs)
    ShannonRes_gamma = eeg_et.shannonEntropy(eegData_gamma, bin_min=-200, bin_max=200, binWidth=2)

    for i in range(16):
        result['ch' + str(i+1) + '_' + 'PB_SE_1'] =  ShannonRes_delta[i,0]
        result['ch' + str(i+1) + '_' + 'PB_SE_2'] =  ShannonRes_theta[i,0]
        result['ch' + str(i+1) + '_' + 'PB_SE_3'] =  ShannonRes_alpha[i,0]
        result['ch' + str(i+1) + '_' + 'PB_SE_4'] =  ShannonRes_beta[i,0]
        result['ch' + str(i+1) + '_' + 'PB_SE_5'] =  ShannonRes_gamma[i,0]
        
    
    # Cepstrum Coefficients (n=2)
    CepstrumRes = eeg_et.mfcc(eegData, fs,order=2)
    CepstrumRes = np.array(CepstrumRes).reshape(16,2)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'cepstrum_1'] =  CepstrumRes[i,0]
        result['ch' + str(i+1) + '_' + 'cepstrum_2'] =  CepstrumRes[i,1]
        

    # Lyapunov Exponent
    LyapunovRes = eeg_et.lyapunov(eegData)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'lyapunov_exponent'] =  LyapunovRes[i,0]
        

    # Median Frequency
    medianFreqRes = eeg_et.medianFreq(eegData,fs)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'median_frequency'] =  medianFreqRes[i,0]
    
#     # Mean Frequency
#     meanFreqRes = eeg_et.meanFreq(eegData,fs)
#     for i in range(16):
#         result['ch' + str(i+1) + '_' + 'mean_frequency'] =  meanFreqRes[i,0]

    # Regularity (burst-suppression)
    regularity_res = eeg_et.eegRegularity(eegData,fs)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'regularity'] =  regularity_res[i,0]
        
    # below, a lot of features are same 0 in example
    
    # False Nearest Neighbor
    FalseNnRes = eeg_et.falseNearestNeighbor(eegData)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'FNN'] =  FalseNnRes[i,0]

    # Diffuse Slowing
    df_res = eeg_et.diffuseSlowing(eegData)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'diffuse_slowing'] =  df_res[i,0]

    # Spikes
    minNumSamples = int(70*fs/1000)
    spikeNum_res = eeg_et.spikeNum(eegData,minNumSamples)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'spikes'] =  spikeNum_res[i,0]

    # # Delta burst after Spike
    # deltaBurst_res = eeg_et.burstAfterSpike(eegData,eegData_delta,minNumSamples=7,stdAway = 3)
    # deltaBurst_res

    # Sharp spike
    sharpSpike_res = eeg_et.shortSpikeNum(eegData,minNumSamples)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'sharp_spikes'] = sharpSpike_res[i,0]

    # Number of Bursts
    numBursts_res = eeg_et.numBursts(eegData,fs)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'num_burst'] = numBursts_res[i,0]

    # Burst length μ and σ
    burstLenMean_res,burstLenStd_res = eeg_et.burstLengthStats(eegData,fs)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'burst_length_mean'] = burstLenMean_res[i,0]
        result['ch' + str(i+1) + '_' + 'burst_length_std'] = burstLenStd_res[i,0]

    # Number of Suppressions
    numSupps_res = eeg_et.numSuppressions(eegData,fs)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'supressions'] = numSupps_res[i,0]

    # Suppression length μ and σ
    suppLenMean_res,suppLenStd_res = eeg_et.suppressionLengthStats(eegData,fs)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'supressions_length_mean'] = suppLenMean_res[i,0]
        result['ch' + str(i+1) + '_' + 'supressions_length_std'] = suppLenStd_res[i,0]

    # all same 1
    # Connectivity features- Coherence - δ
    coherence_res = eeg_et.coherence(eegData,fs)
    for i in range(16):
        result['ch' + str(i+1) + '_' + 'coherence'] = coherence_res[i,0]

    return result

In [29]:
# fe2 = eeg_feature_2(np.array(example))
# len(fe2)

In [30]:
# fe2

In [31]:
# fe_dict = dict(fe1,**fe2)
# len(fe_dict.values())
# 1296

# 1296/16 = 81

In [32]:
# fe_dict.values()

Before we compare the feature difference, check some 0,1 values

In [33]:
# alphabet_2 = alphabet_vision[2] 
# alphabet_2
# # (0, (3064, 3424), 'vision', 'alphabet', 6, 'F')

# example_2 = alphabet_list[0].iloc[3786:4206,0:16]
# # example_2

In [34]:
# example

In [35]:
def generate_feature_dict(data):
    fe_1 = eeg_feature(np.array(data))
    fe_2 = eeg_feature_2(np.array(data))
    feature_dict = dict(fe_1,**fe_2)
    
    return feature_dict

In [36]:
# len(example_2)
# 420

In [37]:
# fe_dict_2 = generate_feature_dict(np.array(example))

Write all those functions in tools.py

In [38]:
from pipelines.tools import generate_feature_dict

In [39]:
# fe_dict_2 = generate_feature_dict(np.array(example_2))
# fe_dict_2['']

In [40]:
# fe_dict_1 = generate_feature_dict(np.array(example))

Writing all features to a dataframe and then to the csv files

So, at first write features into 4 dataframe, then delete no_change values, same features

(those codes would spent a lot of time for excursion)

In [41]:
aat_img = dict()

count = 0 
for item in alphabet_imagination:
    current = alphabet_list[item[0]].iloc[item[1][0]:item[1][-1]+1, 0:16]
    #print(np.array(current))

    current_feature_dict = generate_feature_dict(np.array(current))

    
    if count == 0:
        aat_img['label'] = [item[-1]]
        aat_img['label_index'] = [item[-2]]
        
        for key in current_feature_dict.keys():
            aat_img[key] = [current_feature_dict[key]]
    else:
        aat_img['label'].append(item[-1])
        aat_img['label_index'].append(item[-2])
        
        for key in current_feature_dict.keys():
            aat_img[key].append(current_feature_dict[key])
    
    count += 1


In [42]:
aat_img = pd.DataFrame(aat_img)
aat_img.head()

Unnamed: 0,label,label_index,ch1_min,ch1_max,ch1_std,ch1_mean,ch1_coefficient_variation,ch1_mean_abs,ch1_AAC,ch1_CARD,...,ch7_coherence,ch8_coherence,ch9_coherence,ch10_coherence,ch11_coherence,ch12_coherence,ch13_coherence,ch14_coherence,ch15_coherence,ch16_coherence
0,I,9,-3464.721556,-3318.384685,34.719767,-3372.67479,-0.010294,3372.67479,7.005015,403.0,...,0.474576,0.732249,0.464366,0.634774,0.689424,0.396397,0.636329,0.598004,0.574852,0.625444
1,F,6,-3497.288048,-3331.281642,45.765203,-3394.721899,-0.013481,3394.721899,6.749174,351.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,Q,17,-3790.073549,-3653.392631,30.437155,-3703.890051,-0.008218,3703.890051,6.333221,348.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,D,4,-3810.950078,-3683.321617,35.116086,-3737.206895,-0.009396,3737.206895,6.486997,404.0,...,0.558202,0.591124,0.658237,0.595267,0.661488,0.575933,0.565323,0.714385,0.763491,0.846974
4,V,22,-3902.100492,-3756.836505,36.499483,-3811.941937,-0.009575,3811.941937,6.292202,345.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [49]:
aat_img.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 832 entries, 0 to 831
Columns: 1298 entries, label to ch16_coherence
dtypes: float64(1280), int32(16), int64(1), object(1)
memory usage: 8.2+ MB


In [44]:
aat_vision = dict()

count = 0 
for item in alphabet_vision:
    current = alphabet_list[item[0]].iloc[item[1][0]:item[1][-1]+1, 0:16]
    #print(np.array(current))

    current_feature_dict = generate_feature_dict(np.array(current))

    
    if count == 0:
        aat_vision['label'] = [item[-1]]
        aat_vision['label_index'] = [item[-2]]
        
        for key in current_feature_dict.keys():
            aat_vision[key] = [current_feature_dict[key]]
    else:
        aat_vision['label'].append(item[-1])
        aat_vision['label_index'].append(item[-2])
        
        for key in current_feature_dict.keys():
            aat_vision[key].append(current_feature_dict[key])
    
    count += 1


In [45]:
aat_vision = pd.DataFrame(aat_vision)
aat_vision.head()

Unnamed: 0,label,label_index,ch1_min,ch1_max,ch1_std,ch1_mean,ch1_coefficient_variation,ch1_mean_abs,ch1_AAC,ch1_CARD,...,ch7_coherence,ch8_coherence,ch9_coherence,ch10_coherence,ch11_coherence,ch12_coherence,ch13_coherence,ch14_coherence,ch15_coherence,ch16_coherence
0,I,9,-3411.345591,-3262.952359,31.36744,-3313.606131,-0.009466,3313.606131,6.992184,342.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,F,6,-3454.283292,-3334.187369,22.167027,-3374.728294,-0.006569,3374.728294,6.986747,332.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,Q,17,-3772.192153,-3341.965776,75.384702,-3591.181606,-0.020992,3591.181606,6.906636,413.0,...,0.508136,0.509386,0.660148,0.678768,0.437121,0.62361,0.630649,0.591844,0.544863,0.530359
3,D,4,-3786.139642,-3662.22157,21.2288,-3707.48732,-0.005726,3707.48732,6.090696,334.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,V,22,-3847.428125,-3682.516954,32.71298,-3751.086843,-0.008721,3751.086843,6.442441,347.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [50]:
aat_vision.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 832 entries, 0 to 831
Columns: 1298 entries, label to ch16_coherence
dtypes: float64(1280), int32(16), int64(1), object(1)
memory usage: 8.2+ MB


In [46]:
asl_vision_dict = dict()

count = 0 
for item in asl_vision:
    current = asl_list[item[0]].iloc[item[1][0]:item[1][-1]+1, 0:16]
    #print(np.array(current))

    current_feature_dict = generate_feature_dict(np.array(current))

    
    if count == 0:
        asl_vision_dict['label'] = [item[-1]]
        asl_vision_dict['label_index'] = [item[-2]]
        
        for key in current_feature_dict.keys():
            asl_vision_dict[key] = [current_feature_dict[key]]
    else:
        asl_vision_dict['label'].append(item[-1])
        asl_vision_dict['label_index'].append(item[-2])
        
        for key in current_feature_dict.keys():
            asl_vision_dict[key].append(current_feature_dict[key])
    
    count += 1


In [47]:
asl_vision_dict = pd.DataFrame(asl_vision_dict)
asl_vision_dict.head()

Unnamed: 0,label,label_index,ch1_min,ch1_max,ch1_std,ch1_mean,ch1_coefficient_variation,ch1_mean_abs,ch1_AAC,ch1_CARD,...,ch7_coherence,ch8_coherence,ch9_coherence,ch10_coherence,ch11_coherence,ch12_coherence,ch13_coherence,ch14_coherence,ch15_coherence,ch16_coherence
0,I,9,-4050.292558,-3866.404756,33.093499,-3930.450824,-0.00842,3930.450824,7.109159,338.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,F,6,-4186.369978,-3973.626074,50.269552,-4030.724379,-0.012472,4030.724379,6.776665,341.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,Q,17,-4136.704402,-4000.872851,22.363986,-4041.348081,-0.005534,4041.348081,6.710153,379.0,...,0.578976,0.640656,0.603461,0.419225,0.583453,0.739717,0.623981,0.684624,0.549145,0.58367
3,D,4,-4237.041382,-4044.458752,38.720702,-4094.307705,-0.009457,4094.307705,7.362214,347.0,...,0.587184,0.467851,0.670944,0.59836,0.592328,0.687602,0.701118,0.538246,0.658707,0.529035
4,V,22,-4209.951068,-4103.601468,19.130984,-4128.230552,-0.004634,4128.230552,7.226734,319.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [51]:
asl_vision_dict.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 832 entries, 0 to 831
Columns: 1298 entries, label to ch16_coherence
dtypes: float64(1280), int32(16), int64(1), object(1)
memory usage: 8.2+ MB


In [52]:
asl_img = dict()

count = 0 
for item in asl_imagination:
    current = asl_list[item[0]].iloc[item[1][0]:item[1][-1]+1, 0:16]
    #print(np.array(current))

    current_feature_dict = generate_feature_dict(np.array(current))

    
    if count == 0:
        asl_img ['label'] = [item[-1]]
        asl_img ['label_index'] = [item[-2]]
        
        for key in current_feature_dict.keys():
            asl_img [key] = [current_feature_dict[key]]
    else:
        asl_img['label'].append(item[-1])
        asl_img['label_index'].append(item[-2])
        
        for key in current_feature_dict.keys():
            asl_img[key].append(current_feature_dict[key])
    
    count += 1


In [53]:
asl_img = pd.DataFrame(asl_img)
asl_img.head()

Unnamed: 0,label,label_index,ch1_min,ch1_max,ch1_std,ch1_mean,ch1_coefficient_variation,ch1_mean_abs,ch1_AAC,ch1_CARD,...,ch7_coherence,ch8_coherence,ch9_coherence,ch10_coherence,ch11_coherence,ch12_coherence,ch13_coherence,ch14_coherence,ch15_coherence,ch16_coherence
0,I,9,-4115.336134,-3938.153856,34.241139,-3986.794278,-0.008589,3986.794278,7.532803,399.0,...,0.635704,0.517993,0.671713,0.71116,0.496245,0.473008,0.577944,0.592671,0.533941,0.631285
1,F,6,-4174.545905,-4041.128342,25.232193,-4073.135793,-0.006195,4073.135793,6.99139,325.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,Q,17,-4168.779155,-4019.894185,33.204783,-4071.318429,-0.008156,4071.318429,7.110579,333.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,D,4,-4201.233888,-4049.264377,29.108179,-4095.197891,-0.007108,4095.197891,7.200657,371.0,...,0.768519,0.706395,0.817993,0.689147,0.795852,0.837017,0.669407,0.595486,0.610734,0.686829
4,V,22,-4294.060683,-4121.885195,35.754917,-4186.625657,-0.00854,4186.625657,7.370053,345.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Delete no_changing features and store 4 DF 

Now we have our 4 DF, delete useless lines and store them in csv files


In [54]:
b1_aat_img = list()
for i in aat_img.columns:
    current = aat_img[i]
    
    current = current.tolist()
    # delete duplicated values in the list
    b=len(set(current))
    if b==1:
        # print(b)
        b1_aat_img.append(i)


In [55]:
b1_aat_vision = list()
for i in aat_vision.columns:
    current = aat_vision[i]
    
    current = current.tolist()
    b=len(set(current))
    if b==1:
        # print(b)
        b1_aat_vision.append(i)


In [56]:
b1_asl_img = list()
for i in asl_img.columns:
    current = asl_img[i]
    
    current = current.tolist()
    b=len(set(current))
    if b==1:
        # print(b)
        b1_asl_img.append(i)


In [57]:
b1_asl_vision = list()
for i in asl_vision_dict.columns:
    current = asl_vision_dict[i]
    
    current = current.tolist()
    b=len(set(current))
    if b==1:
        # print(b)
        b1_asl_vision.append(i)


find the intersection of four sets

In [58]:
r = list(set(b1_aat_img).intersection(b1_aat_vision, b1_asl_img, b1_asl_vision)) 
print(len(r), len(b1_aat_img), len(b1_aat_vision), len(b1_asl_img), len(b1_asl_vision))

162 168 168 164 167


In [63]:
# for i in asl_vision_dict.columns:
#     if 'cepstrum_2' in i:
#         print('oui')
#     if 'mean_frequency' in i:
#         print('oui')

In [64]:
print(r)

['ch16_spikes', 'ch10_diffuse_slowing', 'ch13_diffuse_slowing', 'ch1_sharp_spikes', 'ch6_num_burst', 'ch11_supressions_length_std', 'ch5_supressions_length_std', 'ch2_spikes', 'ch8_supressions_length_mean', 'ch1_FNN', 'ch16_supressions', 'ch9_FNN', 'ch10_num_burst', 'ch3_sharp_spikes', 'ch6_supressions_length_std', 'ch15_num_burst', 'ch4_supressions_length_std', 'ch13_burst_length_mean', 'ch6_supressions', 'ch5_sharp_spikes', 'ch16_supressions_length_std', 'ch14_num_burst', 'ch14_diffuse_slowing', 'ch4_burst_length_std', 'ch13_spikes', 'ch16_FNN', 'ch7_spikes', 'ch7_num_zero_crossing', 'ch1_num_burst', 'ch9_spikes', 'ch5_FNN', 'ch10_supressions_length_mean', 'ch6_sharp_spikes', 'ch10_supressions_length_std', 'ch16_burst_length_mean', 'ch15_FNN', 'ch8_burst_length_mean', 'ch11_burst_length_std', 'ch1_diffuse_slowing', 'ch11_sharp_spikes', 'ch12_diffuse_slowing', 'ch9_burst_length_mean', 'ch10_burst_length_std', 'ch14_sharp_spikes', 'ch2_burst_length_std', 'ch11_supressions_length_mean',

In [66]:
unuse_feature_list = list()
for item in r:
    
    strr = item.replace("ch1_",'').replace("ch2_",'').replace("ch3_",'').replace("ch4_",'').replace("ch5_",'').replace("ch6_",'')
    strr = strr.replace("ch7_",'').replace("ch8_",'').replace("ch9_",'').replace("ch10_",'').replace("ch11_",'').replace("ch12_",'')
    strr = strr.replace("ch13_",'').replace("ch14_",'').replace("ch15_",'').replace("ch16_",'')
    
    unuse_feature_list.append(strr)


In [68]:
set(unuse_feature_list)

{'FNN',
 'burst_length_mean',
 'burst_length_std',
 'diffuse_slowing',
 'num_burst',
 'num_zero_crossing',
 'sharp_spikes',
 'spikes',
 'supressions',
 'supressions_length_mean',
 'supressions_length_std'}

In [69]:
len(set(unuse_feature_list))

11

So we could find those features are not useful in our case, so we will delete 11*16=176 features

Check b=2 and b=3 features distribution

In [73]:
b2_aat_img = list()
for i in aat_img.columns:
    current = aat_img[i]
    
    current = current.tolist()
    # delete duplicated values in the list
    b=len(set(current))
    if b==2:
        # print(b)
        b2_aat_img.append(i)


In [77]:
b2_aat_img

['ch3_num_zero_crossing']

In [78]:
from collections import Counter

# one example
Counter(aat_img[b2_aat_img[0]])

Counter({0: 831, 27: 1})

In [79]:
b3_aat_img = list()
for i in aat_img.columns:
    current = aat_img[i]
    
    current = current.tolist()
    # delete duplicated values in the list
    b=len(set(current))
    if b==3:
        # print(b)
        b3_aat_img.append(i)
Counter(aat_img[b3_aat_img[0]])

Counter({0: 830, 38: 1, 145: 1})

In [82]:
b2_aat_vision = list()
for i in aat_vision.columns:
    current = aat_vision[i]
    
    current = current.tolist()
    b=len(set(current))
    if b==2:
        # print(b)
        b2_aat_vision.append(i)
print(b2_aat_vision)
Counter(aat_vision[b2_aat_vision[1]])

['ch11_num_zero_crossing', 'ch13_num_zero_crossing']


Counter({0: 831, 1: 1})

Stop there and delete 176 features then store DF

In [84]:
delete_index = list()
ch_prefix = ['ch'+str(i+1)+"_" for i in range(16)]

for item in set(unuse_feature_list):
    for j in ch_prefix:
        delete_index.append(j+item)

        
len(delete_index)

176

In [85]:
df1 = aat_img.copy().drop(labels=delete_index, axis=1)
df2 = aat_vision.copy().drop(labels=delete_index, axis=1)
df3 = asl_img.copy().drop(labels=delete_index, axis=1)
df4 = asl_vision_dict.copy().drop(labels=delete_index, axis=1)

In [86]:
df1.head()

Unnamed: 0,label,label_index,ch1_min,ch1_max,ch1_std,ch1_mean,ch1_coefficient_variation,ch1_mean_abs,ch1_AAC,ch1_CARD,...,ch7_coherence,ch8_coherence,ch9_coherence,ch10_coherence,ch11_coherence,ch12_coherence,ch13_coherence,ch14_coherence,ch15_coherence,ch16_coherence
0,I,9,-3464.721556,-3318.384685,34.719767,-3372.67479,-0.010294,3372.67479,7.005015,403.0,...,0.474576,0.732249,0.464366,0.634774,0.689424,0.396397,0.636329,0.598004,0.574852,0.625444
1,F,6,-3497.288048,-3331.281642,45.765203,-3394.721899,-0.013481,3394.721899,6.749174,351.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,Q,17,-3790.073549,-3653.392631,30.437155,-3703.890051,-0.008218,3703.890051,6.333221,348.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,D,4,-3810.950078,-3683.321617,35.116086,-3737.206895,-0.009396,3737.206895,6.486997,404.0,...,0.558202,0.591124,0.658237,0.595267,0.661488,0.575933,0.565323,0.714385,0.763491,0.846974
4,V,22,-3902.100492,-3756.836505,36.499483,-3811.941937,-0.009575,3811.941937,6.292202,345.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [87]:
df1.to_csv(feature_path+"eeg_features/aat_img.csv")
df2.to_csv(feature_path+"eeg_features/aat_vision.csv")
df3.to_csv(feature_path+"eeg_features/asl_img.csv")
df4.to_csv(feature_path+"eeg_features/asl_vision.csv")

Now we can read those csv files instead of calculating the process above

In [88]:
# It is difficult to compare whether there are duplicate features, so we skip this function
# I will check manually when I write the feature description and sort eeg_feature.py' function
def compute_two_features():
    return

## Annex - EEG feature description


A preliminary rough feature type classification

* name/abbreviation, description, corresponding function

To be aware of, not all the features are in our DF

### raw features, statistical features

### Higher-order statistical features, signal features

### energy, entropy

### Some features in the frequency domain