In [23]:
import pandas as pd
import numpy as np
from ipynb.fs.defs import RCAF_Featurization2 , RCAF_EDA

In [13]:
test_raw_data = pd.read_csv('test.csv')

In [None]:
sub_test_data = test_raw_data.iloc[2000:2500,:] ## Some of the interpolation methods like in repiration require certain amount of data to work.

In [57]:
pickle.dump(sub_test_data, open('test_data/sub_test_data', 'wb'))

In [7]:
sub_test_data = pickle.load(open('test_data/sub_test_data', 'rb'))

In [3]:
best_model = pickle.load(open('best_model/lgbm_40est', 'rb'))

In [17]:
ecg_n = 4
ecg_lowpass_freq = 100

r_n = 5
r_lowpass_freq = 1

gsr_n = 3
gsr_lowpass_freq = 2

eeg_n = 4
eeg_lowpass_freq = 100

In [37]:
'''
Here we filter data with scipy.signal.buttter() filter. We use different cutoff frequencies as recommended
for ecg , r , gsr and eeg in different research papers.

'''

def filter_data(data):
    
    filtered_ecg = RCAF_EDA.filter_lowpass(data['ecg'],ecg_lowpass_freq,ecg_n)
    filtered_r = RCAF_EDA.filter_lowpass(data['r'],r_lowpass_freq,r_n)
    filtered_gsr = RCAF_EDA.filter_lowpass(data['gsr'],gsr_lowpass_freq,gsr_n)
    
    data['ecg_filtered'] = filtered_ecg
    data['r_filtered'] = filtered_r
    data['gsr_filtered'] = filtered_gsr
    
    eeg_features = ['eeg_fp1', 'eeg_f7', 'eeg_f8',
       'eeg_t4', 'eeg_t6', 'eeg_t5', 'eeg_t3', 'eeg_fp2', 'eeg_o1', 'eeg_p3',
       'eeg_pz', 'eeg_f3', 'eeg_fz', 'eeg_f4', 'eeg_c4', 'eeg_p4', 'eeg_poz',
       'eeg_c3', 'eeg_cz', 'eeg_o2']
    
    for eeg_f in eeg_features:
    
        filtered_eeg = RCAF_EDA.filter_bycolumnindex(data,eeg_f,eeg_lowpass_freq,eeg_n)

        data[eeg_f+'_filtered'] = filtered_eeg
    
    return data

In [38]:
'''

This funtion creates features like heart rate and respiration rate.

Also features based EEG frequency bands like delta , theta , alpha beta and gamma

'''
def heartRate_respRate_eegFreqBands(data):
    
    data_hr = RCAF_Featurization.ecg_heart_rate(data,'LOFT')
    
    data_rr = RCAF_Featurization.respiration_rate(data_hr,'LOFT')
    
    data_eeg_freqband = RCAF_Featurization.eeg_freqbands(data_rr,'LOFT')
    
    return data_eeg_freqband

In [39]:
'''
In this fucntion we create eeg tracks from different eeg point like from top to bottom in left and right part

of the skull and left to right.

Also we create ecg , r and gsr moving averages which average of last 10 indices.

'''
def eegTraces_and_moving_averages(data):
    
    data_eeg_traces = RCAF_Featurization.create_eeg_traces(data)
    
    data_ecg_avg = RCAF_Featurization.ecg_moving_average(data_eeg_traces,'LOFT')
    
    data_r_avg = RCAF_Featurization.r_moving_average(data_ecg_avg,'LOFT')
    
    data_gsr_avg = RCAF_Featurization.gsr_moving_average(data_r_avg,'LOFT')
    
    
    return data_gsr_avg

In [40]:
'''
This function calls creates takes peaks indices returned from sipy.signal.find_peaks() function and 
then we label all the indices have peak as 1 and others as 0.

'''

def peak_features(data):
    
    data_ecg_peaks = RCAF_Featurization.signal_peak_ecg(data,'LOFT')
    data_r_peaks = RCAF_Featurization.signal_peak_r(data_ecg_peaks,'LOFT')
    data_gsr_peaks = RCAF_Featurization.signal_peak_gsr(data_r_peaks,'LOFT')
    
    data_gsr_peaks = data_gsr_peaks.sort_index()
    
    return data_gsr_peaks

In [2]:
'''
create_feature function takes fileterd signals data and calls all the functions for creating new features.

'''

def create_features(data):
    
    data_feature_set1 = eegTraces_and_moving_averages(data)
    
    data_feature_set2 = peak_features(data_feature_set1)
    
    data_feature_set3 = heartRate_respRate_eegFreqBands(data_feature_set2)
    
    return data_feature_set3    

In [3]:
'''
This function converts encoded labels back to the original form.

'''
def decode_pred_Vals(arr):
    
    dec = []
    
    for val in arr:
        
        if (val==0):
            
            dec.append('A')
            
        elif(val==1):
            
            dec.append('B')
            
        elif(val==2):
            
            dec.append('C')
            
        elif(val==3):
            
            dec.append('D')
            
    return dec

In [1]:
'''
Pipeline function which takes raw data and gives predicted label.
It calls all the fucntions listed above for data prepocessing , feature careation and predicting with best model.

'''

def pipeline(query_data):
    
    ############# Test Data preprocessing #################################################
    query_data = pd.DataFrame(query_data)
    
    filtered_data = filter_data(query_data)
    
    filtered_data.drop(['eeg_fp1', 'eeg_f7', 'eeg_f8',
       'eeg_t4', 'eeg_t6', 'eeg_t5', 'eeg_t3', 'eeg_fp2', 'eeg_o1', 'eeg_p3',
       'eeg_pz', 'eeg_f3', 'eeg_fz', 'eeg_f4', 'eeg_c4', 'eeg_p4', 'eeg_poz',
       'eeg_c3', 'eeg_cz', 'eeg_o2', 'ecg', 'r', 'gsr'],axis=1,inplace=True)
    
    
    ############# Featurization ###########################################################
    
    data_with_crafted_features = create_features(filtered_data)
    
    data_with_crafted_features.drop(['id','crew','experiment'],axis=1,inplace=True)
    
    data_with_crafted_features = data_with_crafted_features.sort_index()
    
    data_temp = data_with_crafted_features.loc[:, ['seat','has_r_peak', 'has_ecg_peak','has_gsr_peak']]
    data_with_crafted_features.drop(['seat','has_r_peak', 'has_ecg_peak','has_gsr_peak'],axis=1,inplace=True)
    
    
    ############# Normalizing data ###################################################################
    
    scaler = StandardScaler()
    
    data_std = scaler.fit_transform(data_with_crafted_features)

    data_std_ = pd.DataFrame(data=data_std,columns=list(data_with_crafted_features.columns))
    
    data_model_ready = pd.concat([data_temp,data_std_],axis=1)
    
    data_model_ready['seat'] = data_model_ready['seat'].astype('int')
    data_model_ready['has_r_peak'] = data_model_ready['has_r_peak'].astype('int')
    data_model_ready['has_ecg_peak'] = data_model_ready['has_ecg_peak'].astype('int')
    data_model_ready['has_gsr_peak'] = data_model_ready['has_gsr_peak'].astype('int')
    
    ############# Predicton using best model ########################################################
    
    pred_val = best_model.predict(data_model_ready)
    
    ############# Labels decoding ###################################################################
    
    predictions = decode_pred_Vals(pred_val)
    
    return predictions   

In [80]:
predicted_value = pipeline(sub_test_data)

In [82]:
print(predicted_value[200])
print(predicted_value[508])
print(predicted_value[715])

A
D
C
