In [None]:
%matplotlib inline 
%reload_ext autoreload
%autoreload 2

src_dir = './../src/'
import sys
sys.path[0] = src_dir

import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['font.family'] = 'Arial'

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import glob
import os

import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

from scipy.stats import spearmanr

import resci_inout as rinout
import resci_tools as ret

import nar170830f_predictions as forec

from access_biology_data import annotation
from access_mixed_data import genealacart
from access_reagent_data import drugbank
from access_science_shared import standardizer

In [None]:
drug_categories = [
    'all',
    'pharmacologically_active'
]

In [None]:
year_models = {
    'BioExpSoloPaper': '170904f_predict_year_of_initial_publication/170904_human_BioExp_first_solo_year/zgbrh_p90_e300',
    'BioExpYHoSoloPaper': '170904f_predict_year_of_initial_publication/170904_human_BioExpYea_first_solo_year/zgbrh_p90_e300',
    'BioExpAnyPaper': '170904f_predict_year_of_initial_publication/170904_human_BioExp_first_year/zgbrh_p90_e300',
    'BioExpYHoAnyPaper': '170904f_predict_year_of_initial_publication/170904_human_BioExpYea_first_year/zgbrh_p90_e300',
}

diseases = {
    'UnifiedDisease': annotation.disease_genealacart(9606, add_absenece=False),
    'Omim': annotation.omim_genealacart (9606, add_absenece=False)   
}

drugs_states = ['approved', 'experimental', 'investigational']

In [None]:
save_images = True

In [None]:
genealacart_ids = genealacart.load_genealacart_dataset(
        'ExternalIdentifiers').rename(columns={'EntrezGene_x': 'gene_ncbi'})
genealacart_ids = set(genealacart_ids['gene_ncbi'])

In [None]:
for year_model in year_models.keys():
    p = rinout.get_internal_path(year_models[year_model])
    df_year = forec.load_predicitions(p)
    df_year = df_year[df_year.index.isin(genealacart_ids)]
    
    for disease_definition in diseases.keys():
        df_disease = diseases[disease_definition]
        
        df = df_year.copy()
        df = df[df.index.isin(df_disease['gene_ncbi'])]
        
        amount_of_rank_bins = 10
        df['ranked'] = df['predicted'].rank(pct=True)
        df['bin'] = np.ceil(df['ranked']*amount_of_rank_bins)
        
        for drug_category in drug_categories:
            for drug_state in drugs_states:
                
                db = drugbank.genes_2_drugs_and_status(9606, drug_category)
                db = db[db['status']==drug_state]
                df.loc[:, 'has_drug'] = df.index.isin(db['gene_ncbi'])
                
                sns.barplot(x='bin', y='has_drug', data=df, color='salmon')

                if save_images:
                    ret.export_image(
                        '171001_drugs_for_genes_by_discoverability/binned_{}_{}_{}_{}.pdf'.format(
                            year_model,
                            disease_definition,
                            drug_category,
                            drug_state)
                        )
                        
                plt.close()