## Decoding Subjects from Oscillation Data

...

In [1]:
# Import required libraries/functions
from __future__ import print_function, division
import sys
import random
import numpy as np
import matplotlib.pyplot as plt

import itertools

#from mpl_toolkits.mplot3d import Axes3D

# Import custom code from module om, including general functions and OO code for handling data
sys.path.append('/Users/thomasdonoghue/Documents/GitCode/omegamappin/')

from om.meg.decoding import knn, load_subjs

from om.core.db import OMDB
from om.meg.single import MegData

# Set plots to display inline
%matplotlib inline

In [2]:
# Get database object, set up database to use and check available files
db = OMDB()
dat_source = 'OMEGA'
sub_nums, source = db.check_dat_files('foof', dat_source)


Number of Subjects available: 25

Subject numbers with FOOF data available: 
[111802, 124340, 195121, 215187, 218131, 220216, 245518, 265922, 320548, 339964, 369737, 386068, 390845, 403435, 405716, 431088, 504997, 548027, 559176, 604600, 704718, 704814, 719634, 756575, 978341]



## KNN Classification

The following KNN classification is trained with oscillations from a group of subjects, where the test question is whether, given a single oscillations from a hold-out test set, can we decode which subjec that oscillation comes from. 

Features:
- This is using 3 features: centers frequency, power and bandwidth, from all oscillation (not band specific)

Note:
- This approach is using data from with a single run, for each subject.
- Given this, this analysis is basically asking: are individuals subjects' oscillations idiosyncratic enough that, given a new oscillation from the same run, can we can guess which subject it comes from.
- This isn't really how we want to be decoding, but having data pulled from separate epochs requires a significant amount of re-organizing and re-computing data that is not ready yet. 

In [6]:
#
group_size = 3
combinations = list(itertools.combinations(sub_nums, group_size))

# Get a random sample of possible combinations
n_run = 10
rand_inds = random.sample(range(len(combinations)), n_run)
comb_run = [combinations[i] for i in rand_inds]

print(len(comb_run))

10


In [None]:
#

all_results = []
for comb in comb_run:
    
    cur_results = []
    subjs = load_subjs(comb, dat_source)
    
    for i in range(100):
        cur_results.append(knn(subjs))
    
    all_results.append(np.mean(cur_results))

grand_avg = np.mean(all_results)
chance = 1/group_size

In [None]:
print('Overall performance is {:4.2f}%, with chance performance of {:4.2f}%'.format(grand_avg*100, chance*100))

## TESTING

In [None]:
# Set number of points to plt
#n_points = 15000

# Get a random sample of points to plot
#inds_1 = random.sample(range(meg_subj_1.n_oscs), n_points)
#inds_2 = random.sample(range(meg_subj_2.n_oscs), n_points)

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(meg_subj_1.centers_all[inds_1], meg_subj_1.powers_all[inds_1], meg_subj_1.bws_all[inds_1])
plt.hold()
ax.scatter(meg_subj_2.centers_all[inds_2], meg_subj_2.powers_all[inds_2], meg_subj_2.bws_all[inds_2], color='r')
plt.hold()

In [None]:
plt.scatter(meg_subj_1.centers_all[inds_1], meg_subj_1.bws_all[inds_1])
plt.hold()
plt.scatter(meg_subj_2.centers_all[inds_2], meg_subj_2.bws_all[inds_2], color='r')

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
n_train = 15000
n_test = 50

s1_train, s1_test = test_train_inds(meg_subj_1.n_oscs, n_train, n_test)
s2_train, s2_test = test_train_inds(meg_subj_2.n_oscs, n_train, n_test)

In [None]:
s1_dat = np.array([meg_subj_1.centers_all[s1_train], meg_subj_1.powers_all[s1_train], meg_subj_1.bws_all[s1_train]]).T
s2_dat = np.array([meg_subj_2.centers_all[s2_train], meg_subj_2.powers_all[s2_train], meg_subj_2.bws_all[s2_train]]).T

In [None]:
all_dat = np.vstack([s1_dat, s2_dat])

In [None]:
labels = np.concatenate([np.ones(n_train, dtype=int), np.ones(n_train, dtype=int)*2])

In [None]:
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(all_dat, labels)

In [None]:
rand_ind = random.sample(s1_test, 1)
print(rand_ind)

test_dat = np.array([meg_subj_1.centers_all[rand_ind], meg_subj_1.powers_all[rand_ind], meg_subj_1.bws_all[rand_ind]]).T
print(neigh.predict(test_dat))
print(neigh.predict_proba(test_dat))

In [None]:
res_1 = []
for t_i in s1_test:
    test_dat = np.array([meg_subj_1.centers_all[t_i], meg_subj_1.powers_all[t_i], meg_subj_1.bws_all[t_i]], ndmin=2)
    res_1.append(neigh.predict(test_dat)[0])
    
res_2 = []
for t_i in s2_test:
    test_dat = np.array([meg_subj_2.centers_all[t_i], meg_subj_2.powers_all[t_i], meg_subj_2.bws_all[t_i]], ndmin=2)
    res_2.append(neigh.predict(test_dat)[0])

In [None]:
print(check_accuracy(res_1, 1))
print(check_accuracy(res_2, 2))

In [None]:
import itertools

In [None]:
#sum(1 for i in aa)

In [17]:
a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
b = itertools.combinations(a, 5)