In [None]:
%matplotlib inline  
%reload_ext autoreload
%autoreload 2

import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import numpy as np
import pandas as pd
import seaborn as sns

from access_biology_data import meta
from access_literature_data import medline
from access_science_shared import standardizer

import matplotlib.pyplot as plt

In [None]:
import sys

sys.path.append('./../src/')
import nar170604f_occurences as nar_attention
import nar170605f_funding as nar_funding

import resci_inout as inout
import resci_tools as ret

import nar170604f_occurences as nar

In [None]:
sns.set(font_scale=2)

In [None]:
taxon_id = 9606

ref_genes = standardizer.reference_genes(taxon_id, 'orp')  # offical nomenclature, research, protein-coding

In [None]:

gene2pubmed = medline.gene2pubmed(taxon_id, ['pubmed_id', 'gene_ncbi'], paper_kind='research')
gene2pubmed = gene2pubmed[gene2pubmed['gene_ncbi'].isin(ref_genes)]
papers = nar.count_papers_and_attention(ref_genes, gene2pubmed)

In [None]:
experiment = '170924_human_BioExp_log_papers'
p = inout.get_internal_path(
    '170924f_predict_human_fame_importance/{}/input/features.csv.gz'.format(experiment))
features = pd.read_csv(p).set_index('gene_ncbi')

In [None]:
genes_included_in_model = list(features.index)

In [None]:
papers['in_prediction_model'] = papers.index.isin(genes_included_in_model)

In [None]:
papers['log_papers'] = papers['papers'].apply(lambda x: np.log10(x))
papers['log_attention'] = papers['attention'].apply(lambda x: np.log10(x))

In [None]:
sns.distplot(papers[papers['in_prediction_model']]['log_attention'])

In [None]:
papers['in_prediction_model'].sum()

In [None]:
len(papers)

In [None]:
papers['in_prediction_model'].sum() / len(papers)

In [None]:
sns.distplot(papers[~papers['in_prediction_model']]['log_attention'])

In [None]:
sns.boxplot(x = 'in_prediction_model', y='log_papers', data=papers, notch=True)
ret.export_image('171012_fame_of_genes_in_predictive_models/papers_{}.pdf'.format(experiment))

In [None]:
sns.boxplot(x = 'in_prediction_model', y='log_attention', data=papers, notch=True)
ret.export_image('171012_fame_of_genes_in_predictive_models/attention_{}.pdf'.format(experiment))