In [1]:
import os
import numpy as np
import pandas as pd
import scipy.spatial as sp
import random as rn

In [2]:
os.chdir('C:\Users\John\Documents\Code\\Python\\nsaba\\data_dir')

In [3]:
# Loading up DataFrames

db_table = pd.read_table('database.txt')
feat_table = pd.read_table('features.txt')
expres_aba = pd.read_csv('MicroArrayExpression.csv')
annot_aba = pd.read_csv('SampleAnnot.csv')
probes_aba = pd.read_csv('Probes.csv')

In [4]:
probes_aba['entrez_id'].unique().astype(int)

array([      733,       735,       740, ...,    402778,    404266,
       100329135])

In [5]:
# Generating np.array of all Entrez IDs
probes_aba['entrez_id'][probes_aba['entrez_id'].notnull()].unique().astype(int)

array([      733,       735,       740, ...,    402778,    404266,
       100329135])

In [6]:
# Checking koscherness
print feat_table.shape
print db_table.shape

(10903, 3407)
(386455, 13)


In [7]:
#KDTree for efficient MNI Coordinate Extraction
mni_coords = annot_aba.as_matrix()[1:, 10:].astype(float)
coord_tree = sp.KDTree(mni_coords)

pts = [10, 20, 30]
r, i= coord_tree.query(pts,4.0)
print coord_tree.data[i]

[[  6.1  27.2  32.2]
 [  5.1  27.1  28.6]
 [ 21.8  27.8  40.4]
 [ -8.3  17.2  33. ]]


In [8]:
coord_tree

<scipy.spatial.kdtree.KDTree at 0x26126f60>

In [9]:
# Get only MNI Coordinates
dbmni = db_table[db_table.space == 'MNI']

In [10]:
annot_aba.loc[:,'mni_x':'mni_z'].as_matrix()

array([[-29.2,   5.8,  -2.6],
       [-10.1,   5.9,  -8.4],
       [  8.9,   8.5,  -7.4],
       ..., 
       [ 26. , -15.4,  -8.8],
       [ 27.2, -15.4,  -8.8],
       [ 26. , -15.4,  -9.9]])

In [11]:
# DataFrame to be actually saved in Nsaba
db_table = db_table.loc[db_table.space == 'MNI', ['id','x','y','z']]

In [12]:
# ID to terms
ID = 9185551
df = feat_table.loc[feat_table['pmid'] == ID, feat_table.loc[feat_table['pmid'] == ID].iloc[0] > 0]
df

Unnamed: 0,pmid,accounted,anterior,anterior temporal,bilaterally,blood,blood flow,central,cerebral,cerebral blood,...,stimulation,structures,suggest,temporal,temporal lobe,thresholds,tomography,treatment,using,water
2,9185551,0.126286,0.04888,0.112669,0.083018,0.077009,0.111414,0.081773,0.152437,0.113549,...,0.079183,0.0764,0.046337,0.179396,0.253363,0.39559,0.098813,0.092124,0.040626,0.131512


In [13]:
# Term to IDs

term = 'attention'
term_ids_act = feat_table.loc[feat_table[term] > -1, ['pmid', term]]
term_ids = term_ids_act['pmid'].tolist()
term_ids_act

Unnamed: 0,pmid,attention
0,9106283,0.000000
1,9177231,0.000000
2,9185551,0.000000
3,9256495,0.000000
4,9288642,0.000000
5,9395845,0.000000
6,9405692,0.000000
7,9408106,0.059474
8,9412517,0.000000
9,9465007,0.000000


In [22]:
# IDs to Coords
term_coords = db_table.loc[db_table['id'].isin(term_ids)]
tc = term_coords.loc[:,'x':'z'].as_matrix().astype(float)
ns_coord_tree = sp.KDTree(tc)

term_ids_act.rename(columns={'pmid':'id'}, inplace=True)
df = term_coords.merge(term_ids_act)

In [23]:
max_rad = 2
%time r, ind = ns_coord_tree.query(pts, 3)
ns_coord_tree.data[ind[r<max_rad]]

Wall time: 319 ms


array([[ -2.,  20.,  -6.],
       [ -2.,  20.,  -6.],
       [ -2.,  20.,  -8.],
       ..., 
       [ 15., -11.,  12.],
       [ 15., -11.,  12.],
       [ 15., -10.,  12.]])

In [24]:
ptgen = lambda _: np.random.uniform(-20,20)
pts = [[ptgen(0), ptgen(0), ptgen(0)] for _ in range(200)]

In [26]:
# Getting activation based on Coordinate
# NOTE!!! : Major speed bottleneck below in Nsaba
coords = (6,6,5)
mean = df.ix[(df['x'] == coords[0]) & (df['y'] == coords[1]) & (df['z'] == coords[2])][term]

In [27]:
def process(pt):
    r, ind = ns_coord_tree.query(pt, 5)
    coords = ns_coord_tree.data[ind[r<max_rad]]
    for coord in coords:
        mean = df.ix[(df['x'] == coord[0]) & (df['y'] == coord[1]) & (df['z'] == coord[2])][term]
        act.append(np.mean(mean))

In [28]:
from concurrent import futures

In [29]:
%%time
# Single Thread

act = []
for pt in pts:
    process(pt)

Wall time: 9.21 s


In [30]:
%%time
# Parallel Threads

act = []
with futures.ProcessPoolExecutor() as pool:
    for mean in map(process, pts):
        act.append(np.mean(mean))


TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

In [56]:
expre = 'x == %d and y == %d and z == %d' % (coords[0], coords[1], coords[2])
%timeit df.query(expre, engine='numexpr')[term];

100 loops, best of 3: 11.3 ms per loop


In [39]:
# Returns Sphere about point

def sphere():
    sphere_bucket = []
    set_bucket = []
    for i, r in enumerate(range(4,0,-1)):
        pts = ns_coord_tree.query_ball_point([-2,6,3], r)
        set_bucket.append(set(map(tuple, ns_coord_tree.data[pts])))
   
    for i in range(0,3):
        sphere_bucket.append(list(set_bucket[i].difference(set_bucket[i+1])))
    sphere_bucket.append(list(set_bucket[3]))
    rev_iter = reversed(sphere_bucket)
    
    return np.array([layer for layer in rev_iter])


In [42]:
%time for i in range(800): sphere()

Wall time: 24.7 s


In [43]:
%time sphere()

Wall time: 32 ms


array([[], [], [(-2.0, 4.0, 2.0)], []], dtype=object)