In [14]:
import numpy as np
from matplotlib import pyplot as plt

import baseline

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

import scipy.fftpack as F

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [5]:
data = baseline.prepare_data('/Users/daphne/Dropbox (MIT)/pd-mlhc/CIS')

In [6]:
subject_ids, measurement_ids, all_data, all_n_data, on_off_labels, dyskinesia_labels, tremor_labels = data

In [7]:
X_tre, y_tre = baseline.cleaned_data(all_data, tremor_labels)
X_med, y_med = baseline.cleaned_data(all_data, on_off_labels)
X_dys, y_dys = baseline.cleaned_data(all_data, dyskinesia_labels)

In [83]:
#############################################################
chosen = 'medication'

X_chosen = X_med
y_chosen = y_med
# there are 13 patients. Which go in train? which go in val?
np.random.seed(123)
rate = 0.7
train = []
val = []
for subj in subject_ids :
    r = np.random.rand()
    if r < rate :
        train.append(subj)
    else :
        val.append(subj)
#############################################################

avgs_train = []
var_train = []
y_train = []

avgs_validate = []
var_validate = []
y_validate = []

both_train = []
both_validate = []

for i in range(len(X_chosen)) :
    s = X_chosen[i]
    ident = subject_ids[i]
    for m in s :
        a = np.mean(m,axis=0)
        v = np.var(m,axis=0)
        if ident in train :
            avgs_train.append(a)
            var_train.append(v)
            both_train.append(np.hstack((a,v)))
        elif ident in val :
            avgs_validate.append(a)
            var_validate.append(v)
            both_validate.append(np.hstack((a,v)))
    if ident in train :
        y_train += y_chosen[i]
    elif ident in val :
        y_validate += y_chosen[i]
        
        
# do fourier transform... the above is not good enough
# don't forget - sliding window, regressing against time from event!

In [84]:
clf = LogisticRegression(solver='lbfgs',multi_class='auto').fit(both_train, y_train)
print('Performance on training data for {} labels is {:.3f}'.format(chosen,clf.score(both_train,y_train)))
print('Performance on validation data for {} labels is {:.3f}'.format(chosen,clf.score(both_validate,y_validate)))

Performance on training data for medication labels is 0.443
Performance on validation data for medication labels is 0.536


In [88]:
clf = SVC(gamma='scale',kernel='sigmoid').fit(both_train, y_train)
print('Performance on training data for {} labels is {:.3f}'.format(chosen, clf.score(both_train,y_train)))
print('Performance on validation data for {} labels is {:.3f}'.format(chosen, clf.score(both_validate,y_validate)))

Performance on training data for medication labels is 0.340
Performance on validation data for medication labels is 0.462


In [18]:
def do_fft(signal) :
    N = signal.shape[0]
    T = .02 #seconds per sample
    
    x = np.linspace(0.0, N*T, N)
    yf = F.fft(signal)
    xf = np.linspace(0.0, 1.0/(2.0*T), N/2)
    return xf, 2.0/N * np.abs(yf[:N//2])

In [87]:
'''
window is in seconds
'''

def window(signal, time_window=30, fs=1/0.02) :
    
    n = signal.shape[0]
    window = int(time_window * fs)
    num_splits = np.floor(n/window)
    sigs_raw_trimmed = signal[:(-1*(n%window)),:]
    sigs_list = np.split(sigs_raw_trimmed, num_splits, axis=0)
    return sigs_list


In [93]:
sigs_list = window(X_tre[0][0])
avg_segs = [np.mean(seg,axis=0) for seg in sigs_list]
var_segs = [np.var(seg,axis=0) for seg in sigs_list]