## GENE ANALYSIS

Genetic expression data is taken from the Allen Brain Human Brain Atlas. 

First we use their data to estimate gene expression at our cortical locations. We then check which genes correlate with our electrophysiological data.

In [1]:
from __future__ import print_function, division

# Import required libraries
import sys

# Import custom code from module om, and OO code for handling data
sys.path.append('/Users/thomasdonoghue/Documents/GitCode/omegamappin/')
from om.maps.tg import MapCompTG
from om.core.db import OMDB
from om.plts.maps import *

# Set plots to display inline
%matplotlib inline

In [2]:
# Initialize MapComp object and oscillatory bands list
db = OMDB()
om_maps = MapCompTG(db)

In [3]:
# Check files that are available
db.check_map_files()

Oscillation Files:
 Group_Osc_Scores.npz
Group_Osc_Scores_OLD.npz
Res_Group_85_Osc_prob.p
Res_Group_85_Osc_score.p 

Slope Files:
 Group_Slopes.npz 

Terms Files:
 00-ns_terms.csv
all_terms_r10_gaussian_estimation.csv
all_terms_r5_gaussian_estimation.csv 

Genes Files:
 00-real_gene_names.csv
00-real_gene_names_OLD.csv
avg_gene_estimations
sub1_gene_estimations
sub2_gene_estimations
sub3_gene_estimations
sub4_gene_estimations
sub5_gene_estimations
sub6_gene_estimations 



In [4]:
# Load oscillation maps - here using the oscillation scores
om_maps.load_meg_maps('Res_Group_85_Osc_Score')

In [5]:
# Load slope map
om_maps.load_slope_map('Res_Group_85')

In [6]:
# Load gene files
om_maps.load_gene_maps('avg')

Loading file # 1  of  3
Loading file # 2  of  3
Loading file # 3  of  3
All files loaded!


In [8]:
## Calculate the correlation between oscillation maps and genes

# Set which method to use
method = 'parallel'

# Calculate correlations between oscillations bands and genes
for band in om_maps.bands:
    om_maps.calc_corrs('Genes', band, method=method, stop_par=False)
    
# Calculate the correlation between slopes and genes
om_maps.calc_corrs('Genes', 'Slopes', method=method, stop_par=True)

Running in Parallel
Cluster opened
Calculating corrs between Genes and Theta
Running in Parallel
Calculating corrs between Genes and Beta
Running in Parallel
Calculating corrs between Genes and LowGamma
Running in Parallel
Calculating corrs between Genes and Alpha
Running in Parallel
Calculating corrs between Genes and Slopes
Cluster shut down.


In [9]:
# Print out highest correlated genes for each oscillation band
#for osc in om_maps.bands:
for band in om_maps.bands:
    om_maps.check_corrs('Genes', band, n_check=20, top=True)



Correlations for  Genes  &   Theta : 

# 	 Genes                                                   	 R-Vals 	 P-vals 

1 	 twist family bHLH transcription factor 2                	 0.47295 	 0.0000e+00
2 	 kallikrein-related peptidase 5                          	 0.42556 	 4.1820e-285
3 	 spondin 2, extracellular matrix protein                 	 0.42215 	 4.1167e-280
4 	 receptor (chemosensory) transporter protein 1           	 0.42135 	 5.9777e-279
5 	 germ cell associated 1                                  	 0.40952 	 4.0065e-262
6 	 alkaline phosphatase, liver/bone/kidney                 	 0.40372 	 4.0100e-254
7 	 CD6 molecule                                            	 0.39759 	 7.9018e-246
8 	 protein phosphatase, EF-hand calcium binding domai      	 0.39319 	 5.3649e-240
9 	 Rho GTPase activating protein 28                        	 0.38812 	 2.2997e-233
10 	 proline-rich acidic protein 1                           	 0.38444 	 1.2110e-228
11 	 NIMA-related kinase 2             

In [10]:
# Save calculated correlations to file
#for band in om_maps.bands:
#    om_maps.save_corrs('Genes', band, 'Gr_85', save_as_npz=True, save_as_csv=True)

In [11]:
# Print out the highest correlated genes for slopes
om_maps.check_corrs('Genes', 'Slopes', n_check=20)



Correlations for  Genes  &   Slopes : 

# 	 Genes                                                   	 R-Vals 	 P-vals 

1 	 tetratricopeptide repeat domain 21B                     	 0.57465 	 0.0000e+00
2 	 ST3 beta-galactoside alpha-2,3-sialyltransferase 6      	 0.54371 	 0.0000e+00
3 	 collagen, type V, alpha 1                               	 0.52230 	 0.0000e+00
4 	 uridine phosphorylase 1                                 	 0.52062 	 0.0000e+00
5 	 synaptonemal complex protein 2                          	 0.51199 	 0.0000e+00
6 	 follistatin-like 1                                      	 0.50956 	 0.0000e+00
7 	 Ras association (RalGDS/AF-6) domain family member      	 0.50804 	 0.0000e+00
8 	 Abelson helper integration site 1                       	 0.50531 	 0.0000e+00
9 	 ankyrin repeat and SOCS box containing 13               	 0.50482 	 0.0000e+00
10 	 solute carrier family 25 (aspartate/glutamate carr      	 0.50406 	 0.0000e+00
11 	 bone morphogenetic protein/retinoic acid i

In [None]:
# Save calculated correlations to file
#om_maps.save_corrs('Genes', 'Slopes', 'Gr_85', save_as_npz=True, save_as_csv=True)

## Plots of individual correlations

In [None]:
# Set data
gene_name = 'potassium channel tetramerization domain containing 9 pseudogene 6'
cur_band = 'Beta'

In [None]:
# Pull out data
meg_dat = om_maps.meg_maps[cur_band]
gene_ind = om_maps.gene_names.index(gene_name)

gene_dat = om_maps.gene_maps[gene_ind].as_matrix()
r_val = om_maps.corrs['Genes'][cur_band][gene_ind][0]

In [None]:
import random

In [None]:
# Set number of points to plt
n_points = 1000

# Plot settings
alpha_val = 0.18
axis_fs = 16
lw = 2.5

# Get a random sample of points to plot
inds = random.sample(range(7500), n_points)

# Initialize plot
fig, ax = plt.subplots()

# Draw scatter plot
ax.scatter(gene_dat[inds], meg_dat[inds], color='#173570', marker='o', alpha=alpha_val)

# Set axis limits
space_meg = 0.05 * (max(meg_dat) - min(meg_dat))
space_gene = 0.05 * (max(gene_dat) - min(gene_dat))

min_x = min(gene_dat) - space_gene
max_x = max(gene_dat) + space_gene
min_y = min(meg_dat) - space_meg
max_y = max(meg_dat) + space_meg

plt.xlim([min_x, max_x])
plt.ylim([min_y, max_y])

# Add axis labels
plt.xlabel('Gene Expression', {'fontsize': axis_fs})
plt.ylabel('Oscillation Score', {'fontsize': axis_fs})

# Set the top and right side frame & ticks off
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')

# Set linewidth of remaining spines
ax.spines['left'].set_linewidth(lw)
ax.spines['bottom'].set_linewidth(lw)

# Turn off other axis ticks
ax.xaxis.set_ticks([])
ax.yaxis.set_ticks([])

save_out = True

#
x_range = max_x - min_x
ax.text((max_x-0.90*x_range), (3/4*max_y), 'r = ' + '{:4.4f}'.format(r_val), {'fontsize': 18, 'fontweight':'bold'})

#
if save_out:
    save_name = cur_band + '_' + gene_name + '.pdf'
    plt.savefig(save_name, format='pdf', bbox_inches='tight', dpi=300)

### Calculate Average Gene Results

In [7]:
from om.mapsps.tg import calc_avg_gene_map

In [None]:
# Calculate average gene expressions across all gene-subjects
subj_list = ['sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6']
calc_avg_gene_map(subj_list, 'All')