In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from sklearn.preprocessing import StandardScaler
from scipy.signal import butter, lfilter, cheby2
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet, Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score

In [2]:
# function for bandpass filters

def frq_feature(X, cutoffs,fs):
    F=[]
    raw =[]
    for j in range(len(cutoffs)-1):
        b,a = cheby2(3,10,(cutoffs[j]/(fs/2),cutoffs[j+1]/(fs/2)),btype='bandpass')
        raw.append([lfilter(b,a,X[:,x]) for x in range(32)])
        F = np.concatenate(raw, axis =0)
        F = np.transpose(F)
        scale = StandardScaler()
        F = scale.fit_transform(F)
    return F

In [3]:
# funtion for loading training dataset
def load_train_data(subj):
    filenames =  glob('C:/Users/mysel/Dropbox/datascience bootcamp/EEG/train/subj%d_series*_data.csv' % (subj) )
    y_raw= []
    raw = []
    for filename in filenames:
        data = pd.read_csv(filename, index_col=0)
        labels= pd.read_csv(filename.replace('_data','_events'), index_col=0)
        raw.append(data)
        y_raw.append(labels)

    X = pd.concat(raw)
    y = pd.concat(y_raw)

    X = np.asarray(X.astype(float))
    y = np.asarray(y.astype(float))
    return X,y



In [4]:
# funtion for loading testing dataset
def load_test_data(subj):
    filenames =  glob('C:/Users/mysel/Dropbox/datascience bootcamp/EEG/test/subj%d_series*_data.csv' % (subj) )
    raw = []
    idx_raw =[]
    for filename in filenames:
        data = pd.read_csv(filename)
        raw.append(data)
        idx_raw.append(np.array(data['id']))
    X = pd.concat(raw) 
    X = X.drop(['id' ], axis=1)#remove id
    X = np.asarray(X).astype(float)

    idx=np.concatenate(idx_raw)

    return X, idx

In [5]:
# parameters
fs=500 #sampling rate
cutoffs = [0.2,4,8,13]  #filter passband
ds =40 #downsampling rate

# parameter grids for Elastic Net
alphas = np.logspace(-3, 3, 7)
ratios = [0.1,0.3,0.5,0.7,0.9]
val_flag = True

In [6]:
# initialization
cols = ['HandStart','FirstDigitTouch',
        'BothStartLoadPhase','LiftOff',
        'Replace','BothReleased']

subjects = range(1,13)
prediction_final = []
idx_final = []
auc_score = np.empty((len(subjects),len(cols)))  

In [10]:
# main loop
for subject in subjects:
    # load training data
    X, y = load_train_data(subject)
    
    # Test data
    if val_flag:
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state =0, shuffle = False, test_size =0.2)
    else:
        X_test, test_id = load_test_data(subject)
        X_train = X
        y_train = y
               
    # band-pass filters
    F = frq_feature(X_train,cutoffs,fs)
       
    prediction = np.empty((X_test.shape[0],len(cols)))    
    # looping through events
    for i in range(len(cols)):
        print('Training subject %d, feature: %s' % (subject, i))
         # fit training data with parameter searching
        param_grid = {'alpha':  alphas, 'l1_ratio': ratios}
        grid = GridSearchCV(ElasticNet(), 
                    param_grid = param_grid, 
                    return_train_score=True, 
                    scoring='roc_auc')
        grid.fit(F[::ds,:], y_train[::ds,i])
        prediction[:,i] = grid.predict(frq_feature(X_test,cutoffs,fs))
        if val_flag:
            auc_score[subject-1, i] = roc_auc_score(y_test[:,i],prediction[:,i])
            print("AUC_ROC = {0:.3f}".format(auc_score[subject-1, i]))
            print("Parameters: {0}".format(grid.cv_results_['params'][0]))
            
    prediction_final.append(prediction)
    if ~np.array(val_flag):
        idx_final.append(test_id)

Training subject 1, feature: 0
AUC_ROC = 0.894
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 1, feature: 1
AUC_ROC = 0.871
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 1, feature: 2
AUC_ROC = 0.878
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 1, feature: 3
AUC_ROC = 0.897
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 1, feature: 4
AUC_ROC = 0.876
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 1, feature: 5
AUC_ROC = 0.866
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 2, feature: 0
AUC_ROC = 0.900
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 2, feature: 1
AUC_ROC = 0.803
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 2, feature: 2
AUC_ROC = 0.898
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 2, feature: 3
AUC_ROC = 0.825
Parameters: {'alpha': 0.001, 'l1_ratio': 0.1}
Training subject 2, feature: 4
AUC_ROC = 0.774
Parameters: {'alpha': 0

In [None]:
# generate submission file
submission_file = 'Submission.csv'
submission = pd.DataFrame(index=np.concatenate(idx_final),
                          columns=cols,
                          data=np.concatenate(prediction_final))

# write file
submission.to_csv(submission_file,index_label='id',float_format='%.3f')

In [86]:
import plotly.plotly as py 
import plotly.graph_objs as go
from plotly.graph_objs import *
from matplotlib import cm

py.sign_in('myself0116', '5KHVlvJQsZkxxLpkSk32')
colors = cm.bwr(range(1,13))
traces =[]
names_x = ['subject%d(%.3f+/-%.3f)' % (i, np.mean(auc_score,axis=1)[i-1], np.std(auc_score,axis =1)[i-1]) for i in range(1,13)]
names_col = ['%s \n(%.3f+/-%.3f)' % (cols[i], np.mean(auc_score,axis=0)[i], np.std(auc_score,axis =0)[i]) for i in range(6)]
for i in range(len(subjects)):
    traces.append(go.Scatter(
        y = auc_score[i,:],
        x = names_col,
        mode = 'lines+markers',
        name = names[i],
     #   line=dict(color = colors[i]),
        
    ))


layout = Layout(title = 'ElasticNet Classification (AUC_ROC = %.3f)' % np.mean(auc_score),
                yaxis=dict(title='AUC of ROC',
                          range = [0.5,1]),
                )

fig = go.Figure(data=traces, layout = layout)
py.iplot(fig, filename='ElasticNet Classification')


Looks like you used a newline character: '\n'.

Plotly uses a subset of HTML escape characters
to do things like newline (<br>), bold (<b></b>),
italics (<i></i>), etc. Your newline characters 
have been converted to '<br>' so they will show 
up right on your Plotly figure!

