In [1]:
import afq_transform as afqt
import numpy as np
import pandas as pd

from collections import defaultdict
from tqdm import tqdm

from sklearn.cross_validation import ShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score



In [2]:
nodes = pd.read_csv('data/nodes.csv')
targets = pd.read_csv('data/subjects.csv', index_col='subjectID').drop(['Unnamed: 0'], axis='columns')
y = targets['class']
y = y.map(lambda h: int(h == 'ALS')).values
y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

In [3]:
transformer = afqt.AFQFeatureTransformer()
x, groups, cols = transformer.transform(nodes)

In [7]:
scores = defaultdict(list)

rf = RandomForestClassifier()

#crossvalidate the scores on a number of different random splits of the data
for train_idx, test_idx in tqdm(ShuffleSplit(len(x), 100, .3)):
    x_train, x_test = x[train_idx], x[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    r = rf.fit(x_train, y_train)
    acc = roc_auc_score(y_test, rf.predict(x_test))
    for grp in np.unique(groups):
        x_shuffled = afqt.shuffle_group(x_test, grp, groups)
        shuff_acc = roc_auc_score(y_test, rf.predict(x_shuffled))
        scores[grp].append((acc-shuff_acc)/acc)

100%|██████████| 100/100 [01:16<00:00,  1.30it/s]


In [18]:
print("Features sorted by their score:")
importances = sorted(
    [(feat, round(np.mean(score), 4)) for 
     feat, score in scores.items()],
    key=lambda x: x[1],
    reverse=True)

for f, s in importances:
    group_name = cols.get_values()[groups == f][0]
    metric = group_name[0]
    tract = group_name[1]
    print('{i:+7.5f} : {t:s}, {m:s}'.format(t=tract, m=metric, i=s))

Features sorted by their score:
+0.03410 : Right Corticospinal, fa
+0.02980 : Right Corticospinal, cl
+0.02790 : Right Corticospinal, rd
+0.01370 : Right SLF, md
+0.01090 : Left Arcuate, rd
+0.00770 : Right Corticospinal, md
+0.00750 : Left SLF, volume
+0.00740 : Left Arcuate, ad
+0.00730 : Left SLF, fa
+0.00670 : Left IFOF, curvature
+0.00670 : Left ILF, curvature
+0.00630 : Right Uncinate, torsion
+0.00520 : Right Arcuate, cl
+0.00480 : Left Arcuate, curvature
+0.00450 : Right Thalamic Radiation, ad
+0.00430 : Left IFOF, cl
+0.00430 : Left IFOF, rd
+0.00420 : Callosum Forceps Minor, torsion
+0.00400 : Left Uncinate, ad
+0.00400 : Right Cingulum Hippocampus, cl
+0.00400 : Right Cingulum Hippocampus, curvature
+0.00390 : Left Corticospinal, curvature
+0.00370 : Left Corticospinal, rd
+0.00350 : Right IFOF, ad
+0.00350 : Callosum Forceps Major, cl
+0.00320 : Left Thalamic Radiation, rd
+0.00320 : Right Arcuate, volume
+0.00310 : Left Uncinate, cl
+0.00290 : Left Corticospinal, volume
+0