In [1]:
import pandas as pd
import numpy as np
from scipy import signal
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

import warnings
warnings.filterwarnings('ignore')

In [14]:
data = pd.read_csv('data_filtered.csv')

In [3]:
data.drop(['Unnamed: 0'],axis=1,inplace=True)

In [4]:
data.columns

Index(['crew', 'experiment', 'time', 'seat', 'event', 'ecg_filtered',
       'r_filtered', 'gsr_filtered', 'eeg_fp1_filtered', 'eeg_f7_filtered',
       'eeg_f8_filtered', 'eeg_t4_filtered', 'eeg_t6_filtered',
       'eeg_t5_filtered', 'eeg_t3_filtered', 'eeg_fp2_filtered',
       'eeg_o1_filtered', 'eeg_p3_filtered', 'eeg_pz_filtered',
       'eeg_f3_filtered', 'eeg_fz_filtered', 'eeg_f4_filtered',
       'eeg_c4_filtered', 'eeg_p4_filtered', 'eeg_poz_filtered',
       'eeg_c3_filtered', 'eeg_cz_filtered', 'eeg_o2_filtered'],
      dtype='object')

In [5]:
data = data[['crew', 'experiment', 'time', 'seat', 'eeg_fp1_filtered', 'eeg_f7_filtered',
       'eeg_f8_filtered', 'eeg_t4_filtered', 'eeg_t6_filtered',
       'eeg_t5_filtered', 'eeg_t3_filtered', 'eeg_fp2_filtered',
       'eeg_o1_filtered', 'eeg_p3_filtered', 'eeg_pz_filtered',
       'eeg_f3_filtered', 'eeg_fz_filtered', 'eeg_f4_filtered',
       'eeg_c4_filtered', 'eeg_p4_filtered', 'eeg_poz_filtered',
       'eeg_c3_filtered', 'eeg_cz_filtered', 'eeg_o2_filtered','ecg_filtered',
       'r_filtered', 'gsr_filtered', 'event']]

In [176]:
data.head(10)

Unnamed: 0,crew,experiment,time,seat,eeg_fp1_filtered,eeg_f7_filtered,eeg_f8_filtered,eeg_t4_filtered,eeg_t6_filtered,eeg_t5_filtered,...,fp1-f3-c3-p3-o1,fp1-f7-t3-t5-o1,fp2-f4-c4-p4-o2,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2
0,1,CA,0.011719,1,-5.288495,26.773097,-9.568092,-12.833711,16.713328,33.727615,...,-98.405316,-118.731929,-79.223422,-15.538558,1.107651,1.413723,13.729246,-17.556617,-29.194659,14.708126
1,1,CA,0.015625,1,-2.330711,28.903775,-8.036635,-2.593392,15.820016,30.570086,...,-95.90832,-110.172442,-77.67174,-29.373686,-4.304937,3.909933,6.281643,-25.898227,-34.541336,8.65146
2,1,CA,0.019531,1,10.665265,29.642771,13.226779,22.742471,16.364637,31.926765,...,-88.296417,-103.210797,-49.421793,-74.615432,-5.013028,11.041311,-23.6475,-16.639035,-38.74938,6.294106
3,1,CA,0.023438,1,11.105943,26.41916,4.765931,14.742303,20.37046,31.755383,...,-84.993426,-98.324963,-64.851507,-59.464155,-4.591486,11.29259,-17.333312,-27.619387,-45.456707,11.16271
4,1,CA,0.027344,1,8.163645,25.381885,-1.845156,3.522284,19.811691,28.493136,...,-85.998235,-96.673621,-67.998166,-45.484599,-6.2875,9.734131,-6.698123,-18.87291,-50.72873,9.377495
5,1,CA,0.03125,1,4.676165,23.254642,9.892653,16.187784,16.331599,27.150947,...,-87.15891,-101.349656,-46.806791,-74.4761,-7.44501,9.941806,-15.209341,0.388761,-49.4009,6.954744
6,1,CA,0.035156,1,0.264468,21.302926,3.250808,12.533019,17.608968,29.211403,...,-87.745678,-105.669143,-50.217523,-59.77977,-9.15915,5.669019,-7.196918,-10.195781,-46.805312,13.059889
7,1,CA,0.039062,1,0.089779,17.755873,-9.143255,-0.403446,23.271489,32.969148,...,-91.512292,-104.029213,-75.934362,-43.493934,-5.622442,5.904461,1.152835,-18.384316,-56.551787,10.037031
8,1,CA,0.042969,1,-6.973087,12.910128,-8.71847,1.010149,27.920459,35.090765,...,-103.354226,-113.564362,-87.907745,-65.449151,-0.633509,6.779119,-2.078454,-5.553561,-67.809414,4.950527
9,1,CA,0.046875,1,-11.022851,17.736952,-1.158819,4.407578,26.357972,30.776491,...,-99.361858,-116.392491,-60.453575,-59.745785,-3.081961,-2.570957,-4.962847,0.684873,-61.61578,10.17686


## Creating EEG features

In [10]:
def create_eeg_traces(data):
    
    # Vertical traces of the eeg signals for left side of the brain.

    data['fp1-f3'] = data.apply(lambda row: np.subtract(row['eeg_fp1_filtered'],row['eeg_f3_filtered']),axis=1)

    data['fp1-f3-c3'] = data.apply(lambda row: np.subtract(row['fp1-f3'],row['eeg_c3_filtered']),axis=1)

    data['fp1-f3-c3-p3'] = data.apply(lambda row: np.subtract(row['fp1-f3-c3'],row['eeg_p3_filtered']),axis=1)

    data['fp1-f3-c3-p3-o1'] = data.apply(lambda row: np.subtract(row['fp1-f3-c3-p3'],row['eeg_o1_filtered']),axis=1)

    data.drop(['fp1-f3','fp1-f3-c3','fp1-f3-c3-p3'],axis=1,inplace=True)


    data['fp1-f7'] = data.apply(lambda row: np.subtract(row['eeg_fp1_filtered'],row['eeg_f7_filtered']),axis=1)

    data['fp1-f7-t3'] = data.apply(lambda row: np.subtract(row['fp1-f7'],row['eeg_t3_filtered']),axis=1)

    data['fp1-f7-t3-t5'] = data.apply(lambda row: np.subtract(row['fp1-f7-t3'],row['eeg_t5_filtered']),axis=1)

    data['fp1-f7-t3-t5-o1'] = data.apply(lambda row: np.subtract(row['fp1-f7-t3-t5'],row['eeg_o1_filtered']),axis=1)

    data.drop(['fp1-f7','fp1-f7-t3','fp1-f7-t3-t5'],axis=1,inplace=True)



    # Vertical traces of the eeg signals for right side of the brain.

    data['fp2-f4'] = data.apply(lambda row: np.subtract(row['eeg_fp2_filtered'],row['eeg_f4_filtered']),axis=1)

    data['fp2-f4-c4'] = data.apply(lambda row: np.subtract(row['fp2-f4'],row['eeg_c4_filtered']),axis=1)

    data['fp2-f4-c4-p4'] = data.apply(lambda row: np.subtract(row['fp2-f4-c4'],row['eeg_p4_filtered']),axis=1)

    data['fp2-f4-c4-p4-o2'] = data.apply(lambda row: np.subtract(row['fp2-f4-c4-p4'],row['eeg_o2_filtered']),axis=1)

    data.drop(['fp2-f4','fp2-f4-c4','fp2-f4-c4-p4'],axis=1,inplace=True)


    data['fp2-f8'] = data.apply(lambda row: np.subtract(row['eeg_fp2_filtered'],row['eeg_f8_filtered']),axis=1)

    data['fp2-f8-t4'] = data.apply(lambda row: np.subtract(row['fp2-f8'],row['eeg_t4_filtered']),axis=1)

    data['fp2-f8-t4-t6'] = data.apply(lambda row: np.subtract(row['fp2-f8-t4'],row['eeg_t6_filtered']),axis=1)

    data['fp2-f8-t4-t6-o2'] = data.apply(lambda row: np.subtract(row['fp2-f8-t4-t6'],row['eeg_o2_filtered']),axis=1)

    data.drop(['fp2-f8','fp2-f8-t4','fp2-f8-t4-t6'],axis=1,inplace=True)
    
    
    # Central traces of the eeg signals

    data['fz-cz'] = data.apply(lambda row: np.subtract(row['eeg_fz_filtered'],row['eeg_cz_filtered']),axis=1)

    data['fz-cz-pz'] = data.apply(lambda row: np.subtract(row['fz-cz'],row['eeg_pz_filtered']),axis=1)

    data.drop(['fz-cz'],axis=1,inplace=True)


    # Horizontal traces of the eeg signals.

    data['fp1-fp2'] = data.apply(lambda row: np.subtract(row['eeg_fp1_filtered'],row['eeg_fp2_filtered']),axis=1)


    data['f7-f3'] = data.apply(lambda row: np.subtract(row['eeg_f7_filtered'],row['eeg_f3_filtered']),axis=1)

    data['f7-f3-fz'] = data.apply(lambda row: np.subtract(row['f7-f3'],row['eeg_fz_filtered']),axis=1)

    data['f7-f3-fz-f4'] = data.apply(lambda row: np.subtract(row['f7-f3-fz'],row['eeg_f4_filtered']),axis=1)

    data['f7-f3-fz-f4-f8'] = data.apply(lambda row: np.subtract(row['f7-f3-fz-f4'],row['eeg_f8_filtered']),axis=1)

    data.drop(['f7-f3','f7-f3-fz','f7-f3-fz-f4'],axis=1,inplace=True)


    data['t3-c3'] = data.apply(lambda row: np.subtract(row['eeg_t3_filtered'],row['eeg_c3_filtered']),axis=1)

    data['t3-c3-cz'] = data.apply(lambda row: np.subtract(row['t3-c3'],row['eeg_cz_filtered']),axis=1)

    data['t3-c3-cz-c4'] = data.apply(lambda row: np.subtract(row['t3-c3-cz'],row['eeg_c4_filtered']),axis=1)

    data['t3-c3-cz-c4-t4'] = data.apply(lambda row: np.subtract(row['t3-c3-cz-c4'],row['eeg_t4_filtered']),axis=1)

    data.drop(['t3-c3','t3-c3-cz','t3-c3-cz-c4'],axis=1,inplace=True)


    data['t5-p3'] = data.apply(lambda row: np.subtract(row['eeg_t5_filtered'],row['eeg_p3_filtered']),axis=1)

    data['t5-p3-pz'] = data.apply(lambda row: np.subtract(row['t5-p3'],row['eeg_pz_filtered']),axis=1)

    data['t5-p3-pz-p4'] = data.apply(lambda row: np.subtract(row['t5-p3-pz'],row['eeg_p4_filtered']),axis=1)

    data['t5-p3-pz-p4-t6'] = data.apply(lambda row: np.subtract(row['t5-p3-pz-p4'],row['eeg_t6_filtered']),axis=1)

    data.drop(['t5-p3','t5-p3-pz','t5-p3-pz-p4'],axis=1,inplace=True)


    data['o1-o2'] = data.apply(lambda row: np.subtract(row['eeg_o1_filtered'],row['eeg_o2_filtered']),axis=1)
    
    return data

In [7]:
data = create_eeg_traces(data)

In [10]:
pickle.dump(data, open('data_eegfeatures', 'wb'))

## Moving Average ECG

In [2]:
def cal_moving_average(arr):
    
    ls = []
    
    for i in range(0,len(arr)):
        
        if i > 8 :
            
            ecg_vals = [arr[i] , arr[i-1] , arr[i-2], arr[i-3] , arr[i-4] , arr[i-5],arr[i-6], arr[i-7], arr[i-8], arr[i-9]]
            
            mean = np.mean(ecg_vals)
            
            ls.append(mean)
            
        else:
            
            ls.append(0)
            
    return ls
        
        

In [58]:
def ecg_moving_average(dt,experiment):
    
    ecg_feature = 'ecg_moving_avg'
    
    features = list(dt.columns)
    
    features.append(ecg_feature)
    
    data_crew = pd.DataFrame(columns=features) # creating empty dataframe
    
    crew_id = dict(dt.crew.value_counts())
    
    
    for key, value in crew_id.items():

        data_crew_pilot1 = dt[(dt.crew == key) & (dt.seat == 1) & (dt.experiment == experiment)]
        
        ecg_data_pilot1 = np.asarray(data_crew_pilot1.ecg_filtered)
        
        ecg_mean_pilot1 = cal_moving_average(ecg_data_pilot1)
        
        data_crew_pilot1[ecg_feature] = ecg_mean_pilot1
        
        
        data_crew_pilot2 = dt[ (dt.crew == key) & (dt.seat == 0) & (dt.experiment == experiment)]
        
        ecg_data_pilot2 = np.asarray(data_crew_pilot2.ecg_filtered)
        
        ecg_mean_pilot2 = cal_moving_average(ecg_data_pilot2)
        
        data_crew_pilot2[ecg_feature] = ecg_mean_pilot2
       
        
        data_crew = pd.concat([data_crew,data_crew_pilot1,data_crew_pilot2])
    
    
    return data_crew  
    

In [231]:
data_CA_ecg = ecg_moving_average(data,'CA')
data_DA_ecg = ecg_moving_average(data,'DA')
data_SS_ecg = ecg_moving_average(data,'SS')

In [180]:
data_with_ecg_avg = pd.concat([data_CA_ecg,data_DA_ecg,data_SS_ecg])

In [181]:
data_with_ecg_avg.shape

(4867421, 39)

In [182]:
pickle.dump(data_with_ecg_avg, open('data_ecg_avg', 'wb'))

## Moving Average R

In [59]:
def r_moving_average(dt,experiment):
    
    r_feature = 'r_moving_average'
    
    features = list(dt.columns)
    
    features.append(r_feature)
    
    data_crew = pd.DataFrame(columns=features) # creating empty dataframe
    
    crew_id = dict(dt.crew.value_counts())
    
    
    for key, value in crew_id.items():

        data_crew_pilot1 = dt[(dt.crew == key) & (dt.seat == 1) & (dt.experiment == experiment)]
        
        r_data_pilot1 = np.asarray(data_crew_pilot1.r_filtered)
        
        r_mean_pilot1 = cal_moving_average(r_data_pilot1)
        
        data_crew_pilot1[r_feature] = r_mean_pilot1
        
        
        data_crew_pilot2 = dt[ (dt.crew == key) & (dt.seat == 0) & (dt.experiment == experiment)]
        
        r_data_pilot2 = np.asarray(data_crew_pilot2.r_filtered)
        
        r_mean_pilot2 = cal_moving_average(r_data_pilot2)
        
        data_crew_pilot2[r_feature] = r_mean_pilot2
        
        
        data_crew = pd.concat([data_crew,data_crew_pilot1,data_crew_pilot2])
    
    
    return data_crew  

In [232]:
data_CA_r = r_moving_average(data_with_ecg_avg,'CA')
data_DA_r = r_moving_average(data_with_ecg_avg,'DA')
data_SS_r = r_moving_average(data_with_ecg_avg,'SS')

In [188]:
data_with_r_avg = pd.concat([data_CA_r,data_DA_r,data_SS_r])

In [189]:
data_with_r_avg.shape

(4867421, 40)

## Moving Average GSR

In [60]:
def gsr_moving_average(dt,experiment):
    
    gsr_feature = 'gsr_moving_average'
    
    features = list(dt.columns)
    
    features.append(gsr_feature)
    
    data_crew = pd.DataFrame(columns=features) # creating empty dataframe
    
    crew_id = dict(dt.crew.value_counts())
    
    
    for key, value in crew_id.items():

        data_crew_pilot1 = dt[(dt.crew == key) & (dt.seat == 1) & (dt.experiment == experiment)]
        
        gsr_data_pilot1 = np.asarray(data_crew_pilot1.gsr_filtered)
        
        gsr_mean_pilot1 = cal_moving_average(gsr_data_pilot1)
        
        data_crew_pilot1[gsr_feature] = gsr_mean_pilot1
        
        
        data_crew_pilot2 = dt[ (dt.crew == key) & (dt.seat == 0) & (dt.experiment == experiment)]
        
        gsr_data_pilot2 = np.asarray(data_crew_pilot2.gsr_filtered)
        
        gsr_mean_pilot2 = cal_moving_average(gsr_data_pilot2)
        
        data_crew_pilot2[gsr_feature] = gsr_mean_pilot2
        
        
        data_crew = pd.concat([data_crew,data_crew_pilot1,data_crew_pilot2])
    
    
    return data_crew  

In [233]:
data_CA_gsr = gsr_moving_average(data_with_r_avg,'CA')
data_DA_gsr = gsr_moving_average(data_with_r_avg,'DA')
data_SS_gsr = gsr_moving_average(data_with_r_avg,'SS')

In [192]:
data_with_gsr_avg = pd.concat([data_CA_gsr,data_DA_gsr,data_SS_gsr])

In [193]:
pickle.dump(data_with_gsr_avg, open('data_moving_avg', 'wb'))

In [194]:
data_with_gsr_avg.shape

(4867421, 41)

In [8]:
train_mvg_avg = pickle.load(open('data_moving_avg', 'rb'))

In [9]:
train_mvg_avg.head()


Unnamed: 0,crew,experiment,time,seat,eeg_fp1_filtered,eeg_f7_filtered,eeg_f8_filtered,eeg_t4_filtered,eeg_t6_filtered,eeg_t5_filtered,...,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,gsr_moving_average,r_moving_average,ecg_rolling_mean
2659039,6,CA,0.128906,1,34.830877,22.90855,25.564391,29.426666,20.11863,-6.088825,...,-64.407324,-16.0027,-1.043905,-81.459668,-91.654414,-86.091499,-8.580638,0.0,0.0,0.0
2659041,6,CA,0.132812,1,22.967247,14.99973,15.312251,10.60462,21.998748,-1.467722,...,-37.430251,-5.37567,-3.498153,-51.338706,-45.864578,-58.245917,-0.635971,0.0,0.0,0.0
2659043,6,CA,0.136719,1,19.229382,9.951108,13.334103,-9.068331,-0.138732,5.313678,...,14.599539,1.190363,-3.869519,-42.028909,-13.436717,-12.167913,-1.106442,0.0,0.0,0.0
2659045,6,CA,0.140625,1,18.909279,8.763193,7.350886,9.741196,-6.847633,3.27927,...,6.724231,-0.046208,-1.329692,-29.534721,-26.042864,-9.75384,0.436798,0.0,0.0,0.0
2659047,6,CA,0.144531,1,18.000405,11.382915,7.425218,11.357651,18.420701,2.090205,...,-26.97827,-5.511672,-3.661745,-29.844228,-29.073529,-53.204257,12.061401,0.0,0.0,0.0


In [11]:
train_mvg_avg = train_mvg_avg.sort_index()

In [12]:
train_mvg_avg.head()

Unnamed: 0,crew,experiment,time,seat,eeg_fp1_filtered,eeg_f7_filtered,eeg_f8_filtered,eeg_t4_filtered,eeg_t6_filtered,eeg_t5_filtered,...,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,gsr_moving_average,r_moving_average,ecg_rolling_mean
0,1,CA,0.011719,1,-5.288495,26.773097,-9.568092,-12.833711,16.713328,33.727615,...,-15.538558,1.107651,1.413723,13.729246,-17.556617,-29.194659,14.708126,0.0,0.0,0.0
1,1,CA,0.015625,1,-2.330711,28.903775,-8.036635,-2.593392,15.820016,30.570086,...,-29.373686,-4.304937,3.909933,6.281643,-25.898227,-34.541336,8.65146,0.0,0.0,0.0
2,1,CA,0.019531,1,10.665265,29.642771,13.226779,22.742471,16.364637,31.926765,...,-74.615432,-5.013028,11.041311,-23.6475,-16.639035,-38.74938,6.294106,0.0,0.0,0.0
3,1,CA,0.023438,1,11.105943,26.41916,4.765931,14.742303,20.37046,31.755383,...,-59.464155,-4.591486,11.29259,-17.333312,-27.619387,-45.456707,11.16271,0.0,0.0,0.0
4,1,CA,0.027344,1,8.163645,25.381885,-1.845156,3.522284,19.811691,28.493136,...,-45.484599,-6.2875,9.734131,-6.698123,-18.87291,-50.72873,9.377495,0.0,0.0,0.0


## ECG peaks feature

In [70]:
def create_peak_booleans(peak,dt):
    ls = []
    for i in range(0,dt.shape[0]):
    
        if(i in peak):

            ls.append(1)
        else:

            ls.append(0)
    return ls

In [71]:
def signal_peak_ecg(dt,experiment):
    
    ecg_peak = 'has_ecg_peak'
    
    features = list(dt.columns)
    
    features.append(ecg_peak)
    
    data_crew = pd.DataFrame(columns=features)
    
    crew_id = dict(dt.crew.value_counts())
    
    
    for key, value in crew_id.items():

        data_crew_pilot1 = dt[(dt.crew == key) & (dt.seat == 1) & (dt.experiment == experiment)]
        
        peaks,_ = signal.find_peaks(data_crew_pilot1['ecg_filtered'])
        
        ecg_peaks_pilot1 = create_peak_booleans(peaks,data_crew_pilot1)
        
        data_crew_pilot1[ecg_peak] = ecg_peaks_pilot1
        
        
        data_crew_pilot2 = dt[(dt.crew == key) & (dt.seat == 0) & (dt.experiment == experiment)]
        
        peaks,_ = signal.find_peaks(data_crew_pilot2['ecg_filtered'])
        
        ecg_peaks_pilot2 = create_peak_booleans(peaks,data_crew_pilot2)
        
        data_crew_pilot2[ecg_peak] = ecg_peaks_pilot2
       
        
        data_crew = pd.concat([data_crew,data_crew_pilot1,data_crew_pilot2])
    
    
    return data_crew  

In [20]:
data_CA_ecg_peaks = signal_peak_ecg(train_mvg_avg,'CA')
data_DA_ecg_peaks = signal_peak_ecg(train_mvg_avg,'DA')
data_SS_ecg_peaks = signal_peak_ecg(train_mvg_avg,'SS')

In [21]:
data_ecg_peaks = pd.concat([data_CA_ecg_peaks,data_DA_ecg_peaks,data_SS_ecg_peaks])

In [22]:
data_ecg_peaks.shape

(4867421, 42)

In [23]:
data_ecg_peaks.head()

Unnamed: 0,crew,experiment,time,seat,eeg_fp1_filtered,eeg_f7_filtered,eeg_f8_filtered,eeg_t4_filtered,eeg_t6_filtered,eeg_t5_filtered,...,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,gsr_moving_average,r_moving_average,ecg_rolling_mean,has_ecg_peak
2659039,6,CA,0.128906,1,34.830877,22.90855,25.564391,29.426666,20.11863,-6.088825,...,-16.0027,-1.043905,-81.459668,-91.654414,-86.091499,-8.580638,0.0,0.0,0.0,0
2659041,6,CA,0.132812,1,22.967247,14.99973,15.312251,10.60462,21.998748,-1.467722,...,-5.37567,-3.498153,-51.338706,-45.864578,-58.245917,-0.635971,0.0,0.0,0.0,0
2659043,6,CA,0.136719,1,19.229382,9.951108,13.334103,-9.068331,-0.138732,5.313678,...,1.190363,-3.869519,-42.028909,-13.436717,-12.167913,-1.106442,0.0,0.0,0.0,1
2659045,6,CA,0.140625,1,18.909279,8.763193,7.350886,9.741196,-6.847633,3.27927,...,-0.046208,-1.329692,-29.534721,-26.042864,-9.75384,0.436798,0.0,0.0,0.0,0
2659047,6,CA,0.144531,1,18.000405,11.382915,7.425218,11.357651,18.420701,2.090205,...,-5.511672,-3.661745,-29.844228,-29.073529,-53.204257,12.061401,0.0,0.0,0.0,0


## R peaks feature

In [72]:
def signal_peak_r(dt,experiment):
    
    r_peak = 'has_r_peak'
    
    features = list(dt.columns)
    
    features.append(r_peak)
    
    data_crew = pd.DataFrame(columns=features)
    
    crew_id = dict(dt.crew.value_counts())
    
    
    for key, value in crew_id.items():

        data_crew_pilot1 = dt[(dt.crew == key) & (dt.seat == 1) & (dt.experiment == experiment)]
        
        peaks,_ = signal.find_peaks(data_crew_pilot1['r_filtered'])
        
        r_peaks_pilot1 = create_peak_booleans(peaks,data_crew_pilot1)
        
        data_crew_pilot1[r_peak] = r_peaks_pilot1
        
        
        data_crew_pilot2 = dt[(dt.crew == key) & (dt.seat == 0) & (dt.experiment == experiment)]
        
        peaks,_ = signal.find_peaks(data_crew_pilot2['r_filtered'])
        
        r_peaks_pilot2 = create_peak_booleans(peaks,data_crew_pilot2)
        
        data_crew_pilot2[r_peak] = r_peaks_pilot2
       
        
        data_crew = pd.concat([data_crew,data_crew_pilot1,data_crew_pilot2])
    
    
    return data_crew  

In [25]:
data_CA_r_peaks = signal_peak_r(data_ecg_peaks,'CA')
data_DA_r_peaks = signal_peak_r(data_ecg_peaks,'DA')
data_SS_r_peaks = signal_peak_r(data_ecg_peaks,'SS')

In [26]:
data_r_peaks = pd.concat([data_CA_r_peaks,data_DA_r_peaks,data_SS_r_peaks])

In [225]:
data_r_peaks.shape

(4867421, 43)

## GSR peaks feature

In [73]:
def signal_peak_gsr(dt,experiment):
    
    gsr_peak = 'has_gsr_peak'
    
    features = list(dt.columns)
    
    features.append(gsr_peak)
    
    data_crew = pd.DataFrame(columns=features)
    
    crew_id = dict(dt.crew.value_counts())
    
    
    for key, value in crew_id.items():

        data_crew_pilot1 = dt[(dt.crew == key) & (dt.seat == 1) & (dt.experiment == experiment)]
        
        peaks,_ = signal.find_peaks(data_crew_pilot1['gsr_filtered'])
        
        gsr_peaks_pilot1 = create_peak_booleans(peaks,data_crew_pilot1)
        
        data_crew_pilot1[gsr_peak] = gsr_peaks_pilot1
        
        
        data_crew_pilot2 = dt[(dt.crew == key) & (dt.seat == 0) & (dt.experiment == experiment)]
        
        peaks,_ = signal.find_peaks(data_crew_pilot2['gsr_filtered'])
        
        gsr_peaks_pilot2 = create_peak_booleans(peaks,data_crew_pilot2)
        
        data_crew_pilot2[gsr_peak] = gsr_peaks_pilot2
       
        
        data_crew = pd.concat([data_crew,data_crew_pilot1,data_crew_pilot2])
    
    
    return data_crew  

In [28]:
data_CA_gsr_peaks = signal_peak_gsr(data_r_peaks,'CA')
data_DA_gsr_peaks = signal_peak_gsr(data_r_peaks,'DA')
data_SS_gsr_peaks = signal_peak_gsr(data_r_peaks,'SS')

In [29]:
data_featured_ = pd.concat([data_CA_gsr_peaks,data_DA_gsr_peaks,data_SS_gsr_peaks])

In [31]:
data_featured_ = data_featured_.sort_index()

In [32]:
data_featured_.head()

Unnamed: 0,crew,experiment,time,seat,eeg_fp1_filtered,eeg_f7_filtered,eeg_f8_filtered,eeg_t4_filtered,eeg_t6_filtered,eeg_t5_filtered,...,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,gsr_moving_average,r_moving_average,ecg_rolling_mean,has_ecg_peak,has_r_peak,has_gsr_peak
0,1,CA,0.011719,1,-5.288495,26.773097,-9.568092,-12.833711,16.713328,33.727615,...,13.729246,-17.556617,-29.194659,14.708126,0.0,0.0,0.0,0,0,0
1,1,CA,0.015625,1,-2.330711,28.903775,-8.036635,-2.593392,15.820016,30.570086,...,6.281643,-25.898227,-34.541336,8.65146,0.0,0.0,0.0,0,0,0
2,1,CA,0.019531,1,10.665265,29.642771,13.226779,22.742471,16.364637,31.926765,...,-23.6475,-16.639035,-38.74938,6.294106,0.0,0.0,0.0,0,0,0
3,1,CA,0.023438,1,11.105943,26.41916,4.765931,14.742303,20.37046,31.755383,...,-17.333312,-27.619387,-45.456707,11.16271,0.0,0.0,0.0,0,0,0
4,1,CA,0.027344,1,8.163645,25.381885,-1.845156,3.522284,19.811691,28.493136,...,-6.698123,-18.87291,-50.72873,9.377495,0.0,0.0,0.0,0,0,0


## Data standardisation

In [23]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [34]:
data_temp = data_featured_.loc[:, ['crew','experiment','seat','has_r_peak', 'has_ecg_peak','has_gsr_peak','event']]
data_featured_.drop(['crew','experiment','seat','has_r_peak', 'has_ecg_peak','has_gsr_peak','event'],axis=1,inplace=True)

In [35]:
data_std = scaler.fit_transform(data_featured_)

data_featured_std = pd.DataFrame(data=data_std,columns=list(data_featured_.columns)) # Converting to dataframe

train_featured_std = pd.concat([data_temp,data_featured_std],axis=1)

In [38]:
train_featured_std.head()

Unnamed: 0,crew,experiment,seat,has_r_peak,has_ecg_peak,has_gsr_peak,event,time,eeg_fp1_filtered,eeg_f7_filtered,...,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,gsr_moving_average,r_moving_average,ecg_rolling_mean
0,1,CA,1,0,0,0,A,-1.714367,-0.263545,0.926641,...,-0.391034,0.01024,0.059664,0.210675,-0.356122,-0.442565,0.765588,-2.361624,-27.293017,-8e-06
1,1,CA,1,0,0,0,A,-1.714329,-0.177267,1.004332,...,-0.699105,-0.07187,0.174701,0.120981,-0.541314,-0.526309,0.451516,-2.361624,-27.293017,-8e-06
2,1,CA,1,0,0,0,A,-1.714292,0.201824,1.031278,...,-1.706519,-0.082611,0.503346,-0.239464,-0.33575,-0.592218,0.329274,-2.361624,-27.293017,-8e-06
3,1,CA,1,0,0,0,A,-1.714254,0.214679,0.913735,...,-1.36914,-0.076217,0.514926,-0.16342,-0.579525,-0.697274,0.581739,-2.361624,-27.293017,-8e-06
4,1,CA,1,0,0,0,A,-1.714217,0.128852,0.875913,...,-1.057853,-0.101945,0.443105,-0.035338,-0.385345,-0.779848,0.489165,-2.361624,-27.293017,-8e-06


In [48]:
train_featured_std.head()

Unnamed: 0,crew,experiment,seat,has_r_peak,has_ecg_peak,has_gsr_peak,event,time,eeg_fp1_filtered,eeg_f7_filtered,...,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,ecg_moving_avg,r_moving_average,gsr_moving_average
0,1,CA,1,0,0,0,A,-1.714367,-0.263545,0.926641,...,-0.391034,0.01024,0.059664,0.210675,-0.356122,-0.442565,0.765588,-8e-06,-27.293017,-2.361624
1,1,CA,1,0,0,0,A,-1.714329,-0.177267,1.004332,...,-0.699105,-0.07187,0.174701,0.120981,-0.541314,-0.526309,0.451516,-8e-06,-27.293017,-2.361624
2,1,CA,1,0,0,0,A,-1.714292,0.201824,1.031278,...,-1.706519,-0.082611,0.503346,-0.239464,-0.33575,-0.592218,0.329274,-8e-06,-27.293017,-2.361624
3,1,CA,1,0,0,0,A,-1.714254,0.214679,0.913735,...,-1.36914,-0.076217,0.514926,-0.16342,-0.579525,-0.697274,0.581739,-8e-06,-27.293017,-2.361624
4,1,CA,1,0,0,0,A,-1.714217,0.128852,0.875913,...,-1.057853,-0.101945,0.443105,-0.035338,-0.385345,-0.779848,0.489165,-8e-06,-27.293017,-2.361624


In [50]:
pickle.dump(train_featured_std, open('train_featured_std', 'wb'))

## Creating same features for test data as well

In [289]:
x_test = pickle.load(open('test_data_filtered', 'rb'))

In [9]:
x_test = create_eeg_traces(x_test)

In [292]:
pickle.dump(x_test, open('x_test_eeg_filtered', 'wb'))

In [30]:
x_test.head()

Unnamed: 0,id,crew,experiment,time,seat,ecg_filtered,r_filtered,gsr_filtered,eeg_fp1_filtered,eeg_f7_filtered,...,fp1-f3-c3-p3-o1,fp1-f7-t3-t5-o1,fp2-f4-c4-p4-o2,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2
0,0,1,LOFT,0.0,0,-7334.690761,676.12763,523.898371,17.681304,5.76717,...,-78.369364,222.831086,28.540899,79.03172,-32.82555,22.101758,-24.232714,-36.403993,-202.044249,17.571913
1,1,1,LOFT,0.0,1,-4217.578742,676.887086,519.813451,42.211107,79.165647,...,229.830352,206.665431,178.481527,11.299643,-283.08243,14.809012,-665.54249,-778.615836,-56.454485,-232.871309
2,2,1,LOFT,0.003906,0,-5706.293978,677.649327,515.685067,40.014061,57.393252,...,54.898485,161.375489,99.664141,34.147774,-163.470269,27.83832,-282.829898,-363.981261,-159.48306,-104.035037
3,3,1,LOFT,0.003906,1,-5425.335416,678.414169,511.519081,34.297921,57.672403,...,142.290111,149.473794,104.902751,19.533321,-203.881229,20.702299,-404.088293,-500.40867,-48.018351,-155.093911
4,4,1,LOFT,0.007812,0,-5032.03125,679.181428,507.321447,17.860571,46.167867,...,225.513163,154.737812,161.544612,50.077126,-220.498787,15.290169,-420.149458,-510.874515,62.958235,-178.886577


## Creating Moving Average features for 'ecg' , 'r' and 'gsr' in Test data

In [53]:
x_test = pickle.load(open('test_data/x_test_eeg_filtered', 'rb'))

In [68]:
x_test_ecg_avg = ecg_moving_average(x_test,'LOFT')
x_test_r_avg = r_moving_average(x_test_ecg_avg,'LOFT')
x_test_gsr_avg = gsr_moving_average(x_test_r_avg,'LOFT')

In [69]:
x_test_gsr_avg.shape

(17965143, 41)

In [74]:
data_ecg_peaks = signal_peak_ecg(x_test_gsr_avg,'LOFT')
data_r_peaks = signal_peak_r(data_ecg_peaks,'LOFT')
data_gsr_peaks = signal_peak_gsr(data_r_peaks,'LOFT')

In [75]:
data_gsr_peaks.head()

Unnamed: 0,id,crew,experiment,time,seat,ecg_filtered,r_filtered,gsr_filtered,eeg_fp1_filtered,eeg_f7_filtered,...,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,ecg_moving_average,r_moving_average,gsr_moving_average,has_ecg_peak,has_r_peak,has_gsr_peak
2075596,2075596,2,LOFT,0.0,1,9770.951539,753.591852,584.063305,-97.374042,-98.547068,...,88.091728,75.340527,86.134417,-56.930177,0.0,0.0,0.0,0,0,0
2075597,2075597,2,LOFT,0.003,1,6468.173967,753.917072,591.87486,-60.200494,-54.266783,...,105.90446,93.594768,39.968909,-94.392095,0.0,0.0,0.0,0,0,0
2075599,2075599,2,LOFT,0.007,1,-2537.268863,754.566187,607.441645,10.302923,-38.387928,...,5.602227,28.300667,4.297441,-413.103043,0.0,0.0,0.0,0,0,0
2075601,2075601,2,LOFT,0.011,1,-6425.004844,755.212988,622.873602,50.427561,-9.724952,...,-25.970487,-32.410616,-37.602881,-546.617694,0.0,0.0,0.0,0,0,0
2075603,2075603,2,LOFT,0.014,1,-5674.561934,755.856818,638.101327,49.915598,-19.942795,...,-8.856992,66.986576,-28.115407,-539.876214,0.0,0.0,0.0,0,0,0


In [76]:
data_gsr_peaks = data_gsr_peaks.sort_values('id')

In [84]:
data_gsr_peaks.head()

Unnamed: 0,id,crew,experiment,time,seat,ecg_filtered,r_filtered,gsr_filtered,eeg_fp1_filtered,eeg_f7_filtered,...,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,ecg_moving_average,r_moving_average,gsr_moving_average,has_ecg_peak,has_r_peak,has_gsr_peak
0,0,1,LOFT,0.0,0,-7334.690761,676.12763,523.898371,17.681304,5.76717,...,-24.232714,-36.403993,-202.044249,17.571913,0.0,0.0,0.0,0,0,0
1,1,1,LOFT,0.0,1,-4217.578742,676.887086,519.813451,42.211107,79.165647,...,-665.54249,-778.615836,-56.454485,-232.871309,0.0,0.0,0.0,0,0,0
2,2,1,LOFT,0.003906,0,-5706.293978,677.649327,515.685067,40.014061,57.393252,...,-282.829898,-363.981261,-159.48306,-104.035037,0.0,0.0,0.0,0,0,0
3,3,1,LOFT,0.003906,1,-5425.335416,678.414169,511.519081,34.297921,57.672403,...,-404.088293,-500.40867,-48.018351,-155.093911,0.0,0.0,0.0,0,0,0
4,4,1,LOFT,0.007812,0,-5032.03125,679.181428,507.321447,17.860571,46.167867,...,-420.149458,-510.874515,62.958235,-178.886577,0.0,0.0,0.0,1,0,0


In [94]:
data_gsr_peaks.head()

Unnamed: 0,id,crew,experiment,seat,has_r_peak,has_ecg_peak,has_gsr_peak,time,eeg_fp1_filtered,eeg_f7_filtered,...,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,ecg_moving_avg,r_moving_average,gsr_moving_average
0,0,1,LOFT,0,0,0,0,0.0,17.681304,5.76717,...,79.03172,-32.82555,22.101758,-24.232714,-36.403993,-202.044249,17.571913,0.0,0.0,0.0
1,1,1,LOFT,1,0,0,0,0.0,42.211107,79.165647,...,11.299643,-283.08243,14.809012,-665.54249,-778.615836,-56.454485,-232.871309,0.0,0.0,0.0
2,2,1,LOFT,0,0,0,0,0.003906,40.014061,57.393252,...,34.147774,-163.470269,27.83832,-282.829898,-363.981261,-159.48306,-104.035037,0.0,0.0,0.0
3,3,1,LOFT,1,0,0,0,0.003906,34.297921,57.672403,...,19.533321,-203.881229,20.702299,-404.088293,-500.40867,-48.018351,-155.093911,0.0,0.0,0.0
4,4,1,LOFT,0,0,1,0,0.007812,17.860571,46.167867,...,50.077126,-220.498787,15.290169,-420.149458,-510.874515,62.958235,-178.886577,0.0,0.0,0.0


In [95]:
train_featured_std.head()

Unnamed: 0,crew,experiment,seat,has_r_peak,has_ecg_peak,has_gsr_peak,time,eeg_fp1_filtered,eeg_f7_filtered,eeg_f8_filtered,...,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,ecg_moving_avg,r_moving_average,gsr_moving_average,event
0,1,CA,1,0,0,0,-1.714367,-0.263545,0.926641,-0.387026,...,0.01024,0.059664,0.210675,-0.356122,-0.442565,0.765588,-8e-06,-27.293017,-2.361624,A
1,1,CA,1,0,0,0,-1.714329,-0.177267,1.004332,-0.332052,...,-0.07187,0.174701,0.120981,-0.541314,-0.526309,0.451516,-8e-06,-27.293017,-2.361624,A
2,1,CA,1,0,0,0,-1.714292,0.201824,1.031278,0.431229,...,-0.082611,0.503346,-0.239464,-0.33575,-0.592218,0.329274,-8e-06,-27.293017,-2.361624,A
3,1,CA,1,0,0,0,-1.714254,0.214679,0.913735,0.127515,...,-0.076217,0.514926,-0.16342,-0.579525,-0.697274,0.581739,-8e-06,-27.293017,-2.361624,A
4,1,CA,1,0,0,0,-1.714217,0.128852,0.875913,-0.1098,...,-0.101945,0.443105,-0.035338,-0.385345,-0.779848,0.489165,-8e-06,-27.293017,-2.361624,A


In [97]:
train_featured_std.columns

Index(['crew', 'experiment', 'seat', 'has_r_peak', 'has_ecg_peak',
       'has_gsr_peak', 'time', 'eeg_fp1_filtered', 'eeg_f7_filtered',
       'eeg_f8_filtered', 'eeg_t4_filtered', 'eeg_t6_filtered',
       'eeg_t5_filtered', 'eeg_t3_filtered', 'eeg_fp2_filtered',
       'eeg_o1_filtered', 'eeg_p3_filtered', 'eeg_pz_filtered',
       'eeg_f3_filtered', 'eeg_fz_filtered', 'eeg_f4_filtered',
       'eeg_c4_filtered', 'eeg_p4_filtered', 'eeg_poz_filtered',
       'eeg_c3_filtered', 'eeg_cz_filtered', 'eeg_o2_filtered', 'ecg_filtered',
       'r_filtered', 'gsr_filtered', 'fp1-f3-c3-p3-o1', 'fp1-f7-t3-t5-o1',
       'fp2-f4-c4-p4-o2', 'fp2-f8-t4-t6-o2', 'fz-cz-pz', 'fp1-fp2',
       'f7-f3-fz-f4-f8', 't3-c3-cz-c4-t4', 't5-p3-pz-p4-t6', 'o1-o2',
       'ecg_moving_avg', 'r_moving_average', 'gsr_moving_average', 'event'],
      dtype='object')

In [98]:
data_gsr_peaks.columns

Index(['id', 'crew', 'experiment', 'seat', 'has_r_peak', 'has_ecg_peak',
       'has_gsr_peak', 'time', 'eeg_fp1_filtered', 'eeg_f7_filtered',
       'eeg_f8_filtered', 'eeg_t4_filtered', 'eeg_t6_filtered',
       'eeg_t5_filtered', 'eeg_t3_filtered', 'eeg_fp2_filtered',
       'eeg_o1_filtered', 'eeg_p3_filtered', 'eeg_pz_filtered',
       'eeg_f3_filtered', 'eeg_fz_filtered', 'eeg_f4_filtered',
       'eeg_c4_filtered', 'eeg_p4_filtered', 'eeg_poz_filtered',
       'eeg_c3_filtered', 'eeg_cz_filtered', 'eeg_o2_filtered', 'ecg_filtered',
       'r_filtered', 'gsr_filtered', 'fp1-f3-c3-p3-o1', 'fp1-f7-t3-t5-o1',
       'fp2-f4-c4-p4-o2', 'fp2-f8-t4-t6-o2', 'fz-cz-pz', 'fp1-fp2',
       'f7-f3-fz-f4-f8', 't3-c3-cz-c4-t4', 't5-p3-pz-p4-t6', 'o1-o2',
       'ecg_moving_avg', 'r_moving_average', 'gsr_moving_average'],
      dtype='object')

In [96]:
pickle.dump(data_gsr_peaks, open('test_data/test_featured_new', 'wb'))

## Standardising Test Data

In [2]:
x_test = pickle.load(open('test_data/test_featured_new', 'rb'))

In [4]:
test_temp = x_test.loc[:, ['id','crew','experiment','seat','has_r_peak', 'has_ecg_peak','has_gsr_peak']]
x_test.drop(['crew','experiment','seat','has_r_peak', 'has_ecg_peak','has_gsr_peak'],axis=1,inplace=True)

In [5]:
data_std = scaler.fit_transform(x_test)

data_featured_std = pd.DataFrame(data=data_std,columns=list(x_test.columns)) # Converting to dataframe

test_featured_std = pd.concat([test_temp,data_featured_std],axis=1)

In [14]:
test_featured_std.head()

Unnamed: 0,id,crew,experiment,seat,has_r_peak,has_ecg_peak,has_gsr_peak,time,eeg_fp1_filtered,eeg_f7_filtered,...,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,ecg_moving_avg,r_moving_average,gsr_moving_average
0,0,1,LOFT,0,0,0,0,-1.687829,0.089821,0.025127,...,0.21859,-0.150507,0.461914,-0.045529,-0.11221,-0.572348,0.058057,0.14063,-17.950641,-1.628757
1,1,1,LOFT,1,0,0,0,-1.687829,0.27745,0.616956,...,0.023209,-1.290133,0.308557,-1.633672,-2.515872,-0.154068,-0.761043,0.14063,-17.950641,-1.628757
2,2,1,LOFT,0,0,0,0,-1.687826,0.260644,0.4414,...,0.089117,-0.74544,0.582546,-0.685921,-1.173073,-0.45007,-0.339671,0.14063,-17.950641,-1.628757
3,3,1,LOFT,1,0,0,0,-1.687826,0.216922,0.443651,...,0.04696,-0.929464,0.432485,-0.986206,-1.614895,-0.129831,-0.506665,0.14063,-17.950641,-1.628757
4,4,1,LOFT,0,0,1,0,-1.687823,0.091193,0.350887,...,0.135067,-1.005138,0.318675,-1.02598,-1.648789,0.189005,-0.584481,0.14063,-17.950641,-1.628757


In [13]:
pickle.dump(test_featured_std, open('test_data/test_featured_std', 'wb'))

## Adding Some more features like heart rate, respiration rate and eeg frequency range features

### It has been found in various studies that there is high correlation between change in heart rate and respiration rate when cognitive state of a person changes. for e.g. when a person is more alert his/her heart rate increases same for respiration like when person is under stress then respiration rate increases. So making feature for the same in our data can help improve predictions.


### EEG frequnecy are divided in different range and have association with different coginitve states:
### Delta(1-4 Hz) : Its is used to measure depth of sleep.This is also associated with increased concentration on internal memory tasks.
### Theta(4 - 7 Hz) : When a person is engaged in some difficult task, theta waves become more prominent.
### Alpha(7-12 Hz): This range is associated with relaxed state of mind.
### Beta(12-30 Hz): This range is found when we plan or execute body movements or even see some moving objects.
### Gamma(above 30 Hz ): Some researchers relate gamma with attentive focusing and serves as carrier frequency to facilitate data exchange between brain regions. Others associate gamma with rapid eye movements

In [7]:
from biosppy.signals import ecg, resp , eeg
from scipy.interpolate import interp1d

In [34]:
train_data_ = pickle.load(open('train_featured_std', 'rb'))
test_data_ = pickle.load(open('test_data/test_featured_std', 'rb'))

In [6]:
data_featured = pickle.load(open('train_data/data_featured', 'rb'))

In [10]:
# https://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html
'''
From biosppy library we get heart rate, respiration rate , eeg frequencies at some fixed points
and to fill the gap and generate value for all the indices in our data set this method does a beautiful job.

'''
def interpolate_data(data,new_feature_timesteps,new_feature_vals):
    
    intp1d_ = interp1d(new_feature_timesteps, new_feature_vals, kind='cubic', fill_value="extrapolate")
    
    new_feature = intp1d_(data)
    
    return new_feature

In [9]:
def interpolate_respiration_rate(data,new_feature_timesteps,new_feature_vals):
    
    intp1d_ = interp1d(new_feature_timesteps, new_feature_vals, kind='linear', fill_value="extrapolate")
    
    new_feature = intp1d_(data)
    
    return new_feature

In [11]:
def ecg_heart_rate(dt,experiment):
    
    heart_rate = 'heart rate'
    
    features = list(dt.columns)
    
    features.append(heart_rate)
    
    data_crew = pd.DataFrame(columns=features)
    
    crew_id = dict(dt.crew.value_counts())
    
    
    for key, value in crew_id.items():

        data_crew_pilot1 = dt[(dt.crew == key) & (dt.seat == 1) & (dt.experiment == experiment)]
        
        ecg_hr = ecg.ecg(signal=data_crew_pilot1['ecg_filtered'], sampling_rate=256, show=False)
        
        hr = interpolate_data(data_crew_pilot1['ecg_filtered'], ecg_hr['heart_rate_ts'], ecg_hr['heart_rate'])
        
        data_crew_pilot1[heart_rate] = hr
        
        
        data_crew_pilot2 = dt[(dt.crew == key) & (dt.seat == 0) & (dt.experiment == experiment)]
        
        ecg_hr = ecg.ecg(signal=data_crew_pilot2['ecg_filtered'], sampling_rate=256, show=False)
        
        hr = interpolate_data(data_crew_pilot2['ecg_filtered'], ecg_hr['heart_rate_ts'], ecg_hr['heart_rate'])
        
        data_crew_pilot2[heart_rate] = hr
       
        
        data_crew = pd.concat([data_crew,data_crew_pilot1,data_crew_pilot2])
    
    
    return data_crew  

In [9]:
data_CA_hr = ecg_heart_rate(data_featured,'CA')
data_DA_hr = ecg_heart_rate(data_featured,'DA')
data_SS_hr = ecg_heart_rate(data_featured,'SS')

In [10]:
data_heart_rate = pd.concat([data_CA_hr,data_DA_hr,data_SS_hr])

In [11]:
data_heart_rate.head()

Unnamed: 0,crew,experiment,seat,has_r_peak,has_ecg_peak,has_gsr_peak,event,time,eeg_fp1_filtered,eeg_f7_filtered,...,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,gsr_moving_average,r_moving_average,ecg_moving_average,heart rate
2659039,6,CA,1,0,0,0,A,0.147822,0.391554,0.40677,...,0.516466,0.553153,0.453332,0.535611,0.505979,0.615784,0.206205,0.965081,0.502523,172.494868
2659041,6,CA,1,0,0,0,A,0.147844,0.393195,0.40563,...,0.516557,0.554611,0.453128,0.536672,0.505176,0.615827,0.206207,0.965077,0.502493,172.154635
2659043,6,CA,1,0,1,0,A,0.147866,0.392073,0.405845,...,0.516855,0.555783,0.454755,0.551524,0.508245,0.616181,0.206208,0.965072,0.502902,172.206897
2659045,6,CA,1,0,0,0,A,0.147887,0.390174,0.405307,...,0.517845,0.554026,0.454752,0.548477,0.511715,0.615631,0.20621,0.965067,0.503357,172.14594
2659047,6,CA,1,0,0,0,A,0.147909,0.391078,0.40415,...,0.517763,0.555244,0.455435,0.548621,0.512154,0.615866,0.206211,0.965062,0.503806,172.123309


In [12]:
def respiration_rate(dt,experiment):
    
    resp_rate = 'respiration rate'
    
    features = list(dt.columns)
    
    features.append(resp_rate)
    
    data_crew = pd.DataFrame(columns=features)
    
    crew_id = dict(dt.crew.value_counts())
    
    
    for key, value in crew_id.items():

        data_crew_pilot1 = dt[(dt.crew == key) & (dt.seat == 1) & (dt.experiment == experiment)]
        
        resp_r = resp.resp(signal=data_crew_pilot1['r_filtered'], sampling_rate=256, show=False)
        
        rr = interpolate_respiration_rate(data_crew_pilot1['r_filtered'], resp_r['resp_rate_ts'], resp_r['resp_rate'])
        
        data_crew_pilot1[resp_rate] = rr
        
        
        data_crew_pilot2 = dt[(dt.crew == key) & (dt.seat == 0) & (dt.experiment == experiment)]
        
        resp_r = resp.resp(signal=data_crew_pilot2['r_filtered'], sampling_rate=256, show=False)
        
        rr = interpolate_respiration_rate(data_crew_pilot2['r_filtered'], resp_r['resp_rate_ts'], resp_r['resp_rate'])
        
        data_crew_pilot2[resp_rate] = rr
       
        
        data_crew = pd.concat([data_crew,data_crew_pilot1,data_crew_pilot2])
    
    
    return data_crew  

In [13]:
data_CA_rr = respiration_rate(data_heart_rate,'CA')
data_DA_rr = respiration_rate(data_heart_rate,'DA')
data_SS_rr = respiration_rate(data_heart_rate,'SS')

In [14]:
data_resp_rate = pd.concat([data_CA_rr,data_DA_rr,data_SS_rr])

In [13]:
def eeg_freqbands(dt,experiment):
    
    eeg_theta = 'theta'
    eeg_alpha_low = 'alpha_low'
    eeg_alpha_high = 'alpha_high'
    eeg_beta = 'beta'
    eeg_gamma = 'gamma'
    
    eeg_features_ls = ['eeg_fp1_filtered', 'eeg_f7_filtered',
       'eeg_f8_filtered', 'eeg_t4_filtered', 'eeg_t6_filtered',
       'eeg_t5_filtered', 'eeg_t3_filtered', 'eeg_fp2_filtered',
       'eeg_o1_filtered', 'eeg_p3_filtered', 'eeg_pz_filtered',
       'eeg_f3_filtered', 'eeg_fz_filtered', 'eeg_f4_filtered',
       'eeg_c4_filtered', 'eeg_p4_filtered', 'eeg_poz_filtered',
       'eeg_c3_filtered', 'eeg_cz_filtered', 'eeg_o2_filtered','fp1-f3-c3-p3-o1', 'fp1-f7-t3-t5-o1',
       'fp2-f4-c4-p4-o2', 'fp2-f8-t4-t6-o2', 'fz-cz-pz', 'fp1-fp2',
       'f7-f3-fz-f4-f8', 't3-c3-cz-c4-t4', 't5-p3-pz-p4-t6', 'o1-o2']
    
    features = list(dt.columns)
    
    for eeg_ in eeg_features_ls:
        
        features.append(eeg_+eeg_theta)
        features.append(eeg_+eeg_alpha_low)
        features.append(eeg_+eeg_alpha_high)
        features.append(eeg_+eeg_beta)
        features.append(eeg_+eeg_gamma)
    
    data_crew = pd.DataFrame(columns=features)
    
    crew_id = dict(dt.crew.value_counts())
    

    
    for key, value in crew_id.items():

        data_crew_pilot1 = dt[(dt.crew == key) & (dt.seat == 1) & (dt.experiment == experiment)]
        
        data_eeg_feat1 = data_crew_pilot1.loc[:,eeg_features_ls]
        
        eeg_pf = eeg.get_power_features(signal= data_eeg_feat1, sampling_rate=256)
        
        theta = eeg_pf['theta']
        alpha_low = eeg_pf['alpha_low']
        alpha_high = eeg_pf['alpha_high']
        beta = eeg_pf['beta']
        gamma = eeg_pf['gamma']
        
        for i,eeg_f in enumerate(eeg_features_ls):

            eeg_theta_ = interpolate_data(data_eeg_feat1[eeg_f], eeg_pf['ts'], theta[:,i])
            eeg_alpha_low_ = interpolate_data(data_eeg_feat1[eeg_f], eeg_pf['ts'], alpha_low[:,i])
            eeg_alpha_high_ = interpolate_data(data_eeg_feat1[eeg_f], eeg_pf['ts'], alpha_high[:,i])
            eeg_beta_ = interpolate_data(data_eeg_feat1[eeg_f], eeg_pf['ts'], beta[:,i])
            eeg_gamma_ = interpolate_data(data_eeg_feat1[eeg_f], eeg_pf['ts'], gamma[:,i])

            
            data_crew_pilot1[eeg_f+eeg_theta] = eeg_theta_
            data_crew_pilot1[eeg_f+eeg_alpha_low] = eeg_alpha_low_
            data_crew_pilot1[eeg_f+eeg_alpha_high] = eeg_alpha_high_
            data_crew_pilot1[eeg_f+eeg_beta] = eeg_beta_
            data_crew_pilot1[eeg_f+eeg_gamma] = eeg_gamma_
        
        #####################################################################################
        
        data_crew_pilot2 = dt[(dt.crew == key) & (dt.seat == 0) & (dt.experiment == experiment)]
        
        data_eeg_feat2 = data_crew_pilot2.loc[:,eeg_features_ls]
        
        eeg_pf = eeg.get_power_features(signal= data_eeg_feat2, sampling_rate=256)
        
        theta = eeg_pf['theta']
        alpha_low = eeg_pf['alpha_low']
        alpha_high = eeg_pf['alpha_high']
        beta = eeg_pf['beta']
        gamma = eeg_pf['gamma']
                       
        for i,eeg_f in enumerate(eeg_features_ls):

            eeg_theta_ = interpolate_data(data_eeg_feat2[eeg_f], eeg_pf['ts'], theta[:,i])
            eeg_alpha_low_ = interpolate_data(data_eeg_feat2[eeg_f], eeg_pf['ts'], alpha_low[:,i])
            eeg_alpha_high_ = interpolate_data(data_eeg_feat2[eeg_f], eeg_pf['ts'], alpha_high[:,i])
            eeg_beta_ = interpolate_data(data_eeg_feat2[eeg_f], eeg_pf['ts'], beta[:,i])
            eeg_gamma_ = interpolate_data(data_eeg_feat2[eeg_f], eeg_pf['ts'], gamma[:,i])

            
            data_crew_pilot2[eeg_f+eeg_theta] = eeg_theta_
            data_crew_pilot2[eeg_f+eeg_alpha_low] = eeg_alpha_low_
            data_crew_pilot2[eeg_f+eeg_alpha_high] = eeg_alpha_high_
            data_crew_pilot2[eeg_f+eeg_beta] = eeg_beta_
            data_crew_pilot2[eeg_f+eeg_gamma] = eeg_gamma_
       
        
        data_crew = pd.concat([data_crew,data_crew_pilot1,data_crew_pilot2])
    
    
    return data_crew  

In [17]:
data_CA_eeg_pf = eeg_freqbands(data_resp_rate,'CA')
data_DA_eeg_pf = eeg_freqbands(data_resp_rate,'DA')
data_SS_eeg_pf = eeg_freqbands(data_resp_rate,'SS')

In [18]:
data_eeg_pf = pd.concat([data_CA_eeg_pf,data_DA_eeg_pf,data_SS_eeg_pf])

In [19]:
data_eeg_pf.drop(['crew', 'experiment', 'seat', 'has_r_peak', 'has_ecg_peak',
       'has_gsr_peak', 'event', 'time', 'eeg_fp1_filtered', 'eeg_f7_filtered',
       'eeg_f8_filtered', 'eeg_t4_filtered', 'eeg_t6_filtered',
       'eeg_t5_filtered', 'eeg_t3_filtered', 'eeg_fp2_filtered',
       'eeg_o1_filtered', 'eeg_p3_filtered', 'eeg_pz_filtered',
       'eeg_f3_filtered', 'eeg_fz_filtered', 'eeg_f4_filtered',
       'eeg_c4_filtered', 'eeg_p4_filtered', 'eeg_poz_filtered',
       'eeg_c3_filtered', 'eeg_cz_filtered', 'eeg_o2_filtered', 'ecg_filtered',
       'r_filtered', 'gsr_filtered', 'fp1-f3-c3-p3-o1', 'fp1-f7-t3-t5-o1',
       'fp2-f4-c4-p4-o2', 'fp2-f8-t4-t6-o2', 'fz-cz-pz', 'fp1-fp2',
       'f7-f3-fz-f4-f8', 't3-c3-cz-c4-t4', 't5-p3-pz-p4-t6', 'o1-o2',
       'gsr_moving_average', 'r_moving_average', 'ecg_moving_average'],axis=1,inplace=True)

In [20]:
data_eeg_pf.head()

Unnamed: 0,heart rate,respiration rate,eeg_fp1_filteredtheta,eeg_fp1_filteredalpha_low,eeg_fp1_filteredalpha_high,eeg_fp1_filteredbeta,eeg_fp1_filteredgamma,eeg_f7_filteredtheta,eeg_f7_filteredalpha_low,eeg_f7_filteredalpha_high,...,t5-p3-pz-p4-t6theta,t5-p3-pz-p4-t6alpha_low,t5-p3-pz-p4-t6alpha_high,t5-p3-pz-p4-t6beta,t5-p3-pz-p4-t6gamma,o1-o2theta,o1-o2alpha_low,o1-o2alpha_high,o1-o2beta,o1-o2gamma
2659039,172.494868,-0.035358,4.4e-05,2.508413e-07,1.279159e-07,9.666834e-09,5.804314e-10,4.7e-05,2.65505e-07,1.350792e-07,...,7.2e-05,3.766967e-07,1.935818e-07,1.647006e-08,5.742071e-10,0.000107,5.673901e-07,2.92105e-07,2.024921e-08,4.981823e-10
2659041,172.154635,-0.035358,4.4e-05,2.510316e-07,1.280143e-07,9.676839e-09,5.807131e-10,4.7e-05,2.654792e-07,1.350562e-07,...,7.2e-05,3.767612e-07,1.935848e-07,1.647113e-08,5.757692e-10,0.000107,5.673902e-07,2.921053e-07,2.024916e-08,4.981853e-10
2659043,172.206897,-0.035359,4.4e-05,2.50901e-07,1.279466e-07,9.670036e-09,5.805203e-10,4.7e-05,2.65484e-07,1.350604e-07,...,7.2e-05,3.765089e-07,1.935679e-07,1.646709e-08,5.700174e-10,0.000107,5.673911e-07,2.921075e-07,2.024871e-08,4.982101e-10
2659045,172.14594,-0.03536,4.4e-05,2.506847e-07,1.278364e-07,9.658142e-09,5.801958e-10,4.7e-05,2.65472e-07,1.350499e-07,...,7.2e-05,3.762038e-07,1.935311e-07,1.646266e-08,5.641967e-10,0.000107,5.673897e-07,2.921041e-07,2.024941e-08,4.981716e-10
2659047,172.123309,-0.035361,4.4e-05,2.507869e-07,1.278881e-07,9.663862e-09,5.803499e-10,4.7e-05,2.654466e-07,1.350282e-07,...,7.2e-05,3.761637e-07,1.935251e-07,1.646211e-08,5.635093e-10,0.000107,5.673903e-07,2.921055e-07,2.024911e-08,4.98188e-10


In [21]:
data_eeg_pf = data_eeg_pf.sort_index()

In [24]:
data_std = scaler.fit_transform(data_eeg_pf)

new_features_std = pd.DataFrame(data=data_std,columns=list(data_eeg_pf.columns)) # Converting to dataframe

In [28]:
train_data = pd.concat([train_data_,new_features_std],axis=1)

In [29]:
train_data.head()

Unnamed: 0,crew,experiment,seat,has_r_peak,has_ecg_peak,has_gsr_peak,event,time,eeg_fp1_filtered,eeg_f7_filtered,...,t5-p3-pz-p4-t6theta,t5-p3-pz-p4-t6alpha_low,t5-p3-pz-p4-t6alpha_high,t5-p3-pz-p4-t6beta,t5-p3-pz-p4-t6gamma,o1-o2theta,o1-o2alpha_low,o1-o2alpha_high,o1-o2beta,o1-o2gamma
0,1,CA,1,0,0,0,A,-1.714367,-0.263545,0.926641,...,-0.273852,-0.354617,-0.192741,-0.369647,-0.333646,0.528167,-1.048651,-0.590283,-0.240193,-0.020438
1,1,CA,1,0,0,0,A,-1.714329,-0.177267,1.004332,...,-0.270346,-0.306602,-0.14931,-0.375325,-0.327312,0.528217,-1.057778,-0.597163,-0.240114,-0.019782
2,1,CA,1,0,0,0,A,-1.714292,0.201824,1.031278,...,-0.261147,-0.212327,-0.062987,-0.382288,-0.31545,0.528216,-1.057227,-0.596747,-0.240121,-0.019822
3,1,CA,1,0,0,0,A,-1.714254,0.214679,0.913735,...,-0.260542,-0.206861,-0.05795,-0.382563,-0.314781,0.528218,-1.059037,-0.598115,-0.240097,-0.019691
4,1,CA,1,0,0,0,A,-1.714217,0.128852,0.875913,...,-0.269551,-0.297204,-0.14076,-0.376241,-0.326098,0.528136,-1.073101,-0.608798,-0.239813,-0.018671


In [30]:
pickle.dump(train_data, open('train_data/train_data_new', 'wb'))

## Adding above features on test data as well

In [3]:
test_data = pickle.load(open('test_data/test_featured_new', 'rb'))

In [4]:
test_data.head()

Unnamed: 0,id,crew,experiment,seat,has_r_peak,has_ecg_peak,has_gsr_peak,time,eeg_fp1_filtered,eeg_f7_filtered,...,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,ecg_moving_avg,r_moving_average,gsr_moving_average
0,0,1,LOFT,0,0,0,0,0.0,17.681304,5.76717,...,79.03172,-32.82555,22.101758,-24.232714,-36.403993,-202.044249,17.571913,0.0,0.0,0.0
1,1,1,LOFT,1,0,0,0,0.0,42.211107,79.165647,...,11.299643,-283.08243,14.809012,-665.54249,-778.615836,-56.454485,-232.871309,0.0,0.0,0.0
2,2,1,LOFT,0,0,0,0,0.003906,40.014061,57.393252,...,34.147774,-163.470269,27.83832,-282.829898,-363.981261,-159.48306,-104.035037,0.0,0.0,0.0
3,3,1,LOFT,1,0,0,0,0.003906,34.297921,57.672403,...,19.533321,-203.881229,20.702299,-404.088293,-500.40867,-48.018351,-155.093911,0.0,0.0,0.0
4,4,1,LOFT,0,0,1,0,0.007812,17.860571,46.167867,...,50.077126,-220.498787,15.290169,-420.149458,-510.874515,62.958235,-178.886577,0.0,0.0,0.0


In [32]:
sub_test_data = test_data.iloc[0:9001,:]

In [33]:
sub_test_data.head()

Unnamed: 0,id,crew,experiment,seat,has_r_peak,has_ecg_peak,has_gsr_peak,time,eeg_fp1_filtered,eeg_f7_filtered,...,fp2-f8-t4-t6-o2,fz-cz-pz,fp1-fp2,f7-f3-fz-f4-f8,t3-c3-cz-c4-t4,t5-p3-pz-p4-t6,o1-o2,ecg_moving_avg,r_moving_average,gsr_moving_average
0,0,1,LOFT,0,0,0,0,0.0,17.681304,5.76717,...,79.03172,-32.82555,22.101758,-24.232714,-36.403993,-202.044249,17.571913,0.0,0.0,0.0
1,1,1,LOFT,1,0,0,0,0.0,42.211107,79.165647,...,11.299643,-283.08243,14.809012,-665.54249,-778.615836,-56.454485,-232.871309,0.0,0.0,0.0
2,2,1,LOFT,0,0,0,0,0.003906,40.014061,57.393252,...,34.147774,-163.470269,27.83832,-282.829898,-363.981261,-159.48306,-104.035037,0.0,0.0,0.0
3,3,1,LOFT,1,0,0,0,0.003906,34.297921,57.672403,...,19.533321,-203.881229,20.702299,-404.088293,-500.40867,-48.018351,-155.093911,0.0,0.0,0.0
4,4,1,LOFT,0,0,1,0,0.007812,17.860571,46.167867,...,50.077126,-220.498787,15.290169,-420.149458,-510.874515,62.958235,-178.886577,0.0,0.0,0.0


In [36]:
test_data_hr = ecg_heart_rate(sub_test_data,'LOFT')
test_data_rr = respiration_rate(sub_test_data,'LOFT')

In [35]:
test_data_eeg_pf = eeg_freqbands(sub_test_data,'LOFT')

In [13]:
test_data_hr = ecg_heart_rate(test_data,'LOFT')
test_data_rr = respiration_rate(test_data_hr,'LOFT')

In [14]:
test_data_eeg_pf = eeg_freqbands(test_data_rr,'LOFT')

In [15]:
pickle.dump(test_data_eeg_pf, open('test_data/test_data_newfeatures', 'wb'))

In [32]:
test_data_eeg_pf = pickle.load(open('test_data/test_featured_new', 'rb'))

In [18]:
test_data_eeg_pf.drop(['id', 'crew', 'experiment', 'seat', 'has_r_peak', 'has_ecg_peak',
       'has_gsr_peak', 'time', 'eeg_fp1_filtered', 'eeg_f7_filtered',
       'eeg_f8_filtered', 'eeg_t4_filtered', 'eeg_t6_filtered',
       'eeg_t5_filtered', 'eeg_t3_filtered', 'eeg_fp2_filtered',
       'eeg_o1_filtered', 'eeg_p3_filtered', 'eeg_pz_filtered',
       'eeg_f3_filtered', 'eeg_fz_filtered', 'eeg_f4_filtered',
       'eeg_c4_filtered', 'eeg_p4_filtered', 'eeg_poz_filtered',
       'eeg_c3_filtered', 'eeg_cz_filtered', 'eeg_o2_filtered', 'ecg_filtered',
       'r_filtered', 'gsr_filtered', 'fp1-f3-c3-p3-o1', 'fp1-f7-t3-t5-o1',
       'fp2-f4-c4-p4-o2', 'fp2-f8-t4-t6-o2', 'fz-cz-pz', 'fp1-fp2',
       'f7-f3-fz-f4-f8', 't3-c3-cz-c4-t4', 't5-p3-pz-p4-t6', 'o1-o2',
       'ecg_moving_avg', 'r_moving_average', 'gsr_moving_average'],axis=1,inplace=True)

In [38]:
data_std = scaler.fit_transform(test_data_eeg_pf)

new_features_std = pd.DataFrame(data=data_std,columns=list(test_data_eeg_pf.columns)) # Converting to dataframe

In [20]:
test_data_new = pd.concat([test_data,new_features_std],axis=1)

In [21]:
pickle.dump(test_data_new, open('test_data/test_data_newfeatures_std', 'wb'))

In [2]:
test_data = pickle.load(open('test_data/test_data_newfeatures_std', 'rb'))

In [3]:
test_data.shape

(17965143, 196)

In [4]:
test_data.columns

Index(['id', 'crew', 'experiment', 'seat', 'has_r_peak', 'has_ecg_peak',
       'has_gsr_peak', 'time', 'eeg_fp1_filtered', 'eeg_f7_filtered',
       ...
       't5-p3-pz-p4-t6theta', 't5-p3-pz-p4-t6alpha_low',
       't5-p3-pz-p4-t6alpha_high', 't5-p3-pz-p4-t6beta', 't5-p3-pz-p4-t6gamma',
       'o1-o2theta', 'o1-o2alpha_low', 'o1-o2alpha_high', 'o1-o2beta',
       'o1-o2gamma'],
      dtype='object', length=196)

In [4]:
def testfunc():
    print("feature file func")

In [5]:
testfunc()

feature file func
