In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline

In [None]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['font.family'] = 'Arial'

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [None]:
import glob
import os

import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

In [None]:
from access_biology_data import annotation, gwas_studies, meta, phenotype_collections, properties, relations
from access_literature_data import medline
from access_mixed_data import genealacart
from access_science_shared import standardizer, utils

In [None]:
import sys
sys.path.append('./../src/')

import nar170604f_occurences as nar_attention
import nar170830f_predictions as forec
import resci_inout as rinout
import resci_tools as ret

import nar180310_mega_integrator as mega

In [None]:
save_images = False
save_tables = True

In [None]:
tr=6

In [None]:
taxon_id = 9606

In [None]:
save_images = True

In [None]:
ref_genes = standardizer.reference_genes(9606, 'orp')

In [None]:
tsne_frame = mega.load_layout(rotation_degrees=45)

In [None]:
import nar170605f_funding as nih

In [None]:
biogrid = relations.biogrid(9606)

In [None]:
biogrid = biogrid[biogrid['Throughput']=='High Throughput']

In [None]:
c = pd.concat(
    [
     #   biogrid['Entrez Gene Interactor A'].value_counts(),
        biogrid['Entrez Gene Interactor B'].value_counts()
    ], axis=1
).fillna(0).sum(axis=1).reindex(ref_genes).fillna(0).to_frame('occurence')
c.index.name='gene_ncbi'

In [None]:
c['enrichment'] = np.log2(c['occurence'] / c['occurence'].mean())

In [None]:
c_biogrid = c

In [None]:
a = mega.gtx()

In [None]:
c_gtx = a[0][['gtx_fraction']]

In [None]:
c_gtx['enrichment'] = np.log2(c_gtx['gtx_fraction'] / c_gtx['gtx_fraction'].mean())

In [None]:
c_gtx = c_gtx.reindex(ref_genes).fillna(-tr)

In [None]:
a = mega.rnai_phenotypes()

In [None]:
he = phenotype_collections.genome_rnai(taxon_id=9606)
f = he['phenotype'].str.contains('shRNA abundance')
he = he[~he['pubmed_id'].isin(he.loc[f, 'pubmed_id'])].copy()

he = he[~he['gene_ncbi'].str.contains(',')]
he = he[he['gene_ncbi'] != '']
he['gene_ncbi'] = he['gene_ncbi'].astype(float)

he = he[he['gene_ncbi'].isin(ref_genes)]

he = he.copy()

he.loc[:, 'has_phenotype'] = he.loc[:, 'phenotype'] != 'none'

g = he[['gene_ncbi', 'has_phenotype']].groupby(['gene_ncbi'])

d = pd.concat([
    g.agg(np.mean),
    g.size().rename('studies')
], axis=1).reset_index()

In [None]:
c_rnai = d[['gene_ncbi', 'has_phenotype']].set_index('gene_ncbi')   #.reindex(ref_genes).fillna(0)

In [None]:
c_rnai['enrichment'] = np.log2(c_rnai['has_phenotype'] / c_rnai['has_phenotype'].mean())

In [None]:
c_rnai = c_rnai.reindex(ref_genes).fillna(-tr)

In [None]:
jo = pd.concat([
    c_biogrid[['enrichment']].rename(columns={'enrichment': 'biogrid'}),
    c_gtx[['enrichment']].rename(columns={'enrichment': 'gtx'}),
    c_rnai[['enrichment']].rename(columns={'enrichment': 'rnai'})
], axis=1


)

In [None]:
def export(file_base):
    p = '180421_explore_experimental_platforms/{}'.format(file_base)

    ret.export_raster_image(p + '.png', dpi=300, insert_date_time=False)
    ret.export_image(p + '.pdf', insert_date_time=False)




In [None]:
def methods_enrichment(kind):
    
    to_plot = pd.merge(tsne_frame.reset_index(), jo[[kind]].reset_index())

    to_plot = to_plot.sample(frac=1, replace=False, random_state=1)

    entry_to_display = kind

    f = to_plot[entry_to_display] < -tr
    to_plot.loc[f, entry_to_display] = -tr

    f = to_plot[entry_to_display] > tr
    to_plot.loc[f, entry_to_display] = tr    

    plt.figure()
    plt.scatter(
        x=to_plot.loc[:, 'x'],
        y=to_plot.loc[:, 'y'],
        s=3,
        c=to_plot.loc[:, entry_to_display],
        cmap='coolwarm',
        vmin=-tr,
        vmax=tr
    )  # 

    plt.colorbar()
    plt.title(entry_to_display)
    
    if save_images:
        export(kind)

In [None]:
methods_enrichment('rnai')

In [None]:
methods_enrichment('gtx')

In [None]:
methods_enrichment('biogrid')

In [None]:
if save_tables:

    p = '180421_explore_experimental_platforms/biogrid_data.csv'
    ret.export_full_frame(p, c_biogrid)
    
    
    p = '180421_explore_experimental_platforms/rnai_data.csv'
    ret.export_full_frame(p, c_rnai)
                          
    p = '180421_explore_experimental_platforms/gxa_data.csv'
    ret.export_full_frame(p, c_gtx)