In [None]:
%matplotlib inline  
%reload_ext autoreload
%autoreload 2

import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['font.family'] = 'Arial'

import numpy as np
import pandas as pd
# import seaborn as sns

from access_literature_data import medline
from access_science_shared import standardizer

import matplotlib.pyplot as plt



In [None]:
import sys
sys.path.append('./../src/')
import nar170604f_occurences as nar
import resci_tools as ret

In [None]:
save_images = False
save_tables = True

In [None]:
taxon_id = 9606

ref_genes = standardizer.reference_genes(taxon_id, 'orp')  # offical nomenclature, liteature, protein-coding
ref_gene2pubmed = medline.gene2pubmed(taxon_id, ['pubmed_id', 'gene_ncbi'], paper_kind='research')
papers = nar.count_papers_and_attention(ref_genes, ref_gene2pubmed)

In [None]:
# all_gene2pubmed = medline.gene2pubmed('all')
# genes_per_paper = all_gene2pubmed['pubmed_id'].value_counts()

In [None]:
genes_per_paper = ref_gene2pubmed['pubmed_id'].value_counts()

In [None]:
gene2pubmed = pd.merge(
    ref_gene2pubmed,
    genes_per_paper.to_frame('genes_per_paper'),
    left_on='pubmed_id',
    right_index=True, how='left')

In [None]:
master = pd.merge(
    gene2pubmed[['gene_ncbi', 'genes_per_paper']].groupby('gene_ncbi').agg(np.median).reset_index(), 
    papers.reset_index())

In [None]:
data=master.apply(lambda x: np.log10(x))[['papers', 'genes_per_paper']].values

In [None]:
import numpy as np
import matplotlib.pyplot as pl
import scipy.stats as st

x = data[:, 0]
y = data[:, 1]
xmin, xmax = -0, 3
ymin, ymax = -0.4, 4.5

# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values)
f = np.reshape(kernel(positions).T, xx.shape)

fig = pl.figure()
ax = fig.gca()
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Purples')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
cset = ax.contour(xx, yy, f, colors='k')
# Label plot
# ax.clabel(cset, inline=1, fontsize=10)
# ax.set_xlabel('Y1')
# ax.set_ylabel('Y0')

pl.colorbar(cfset)


if save_images:
    ret.export_image('171102_genes_in_paper_for_different_genes/genes_in_publication.pdf')
    
pl.show()



In [None]:
out = master[['gene_ncbi', 'papers', 'genes_per_paper']]
if save_tables:
    ret.export_full_frame(
        '171102_genes_in_paper_for_different_genes/data.csv',
        out,
        save_index=False       
    )

In [None]:
import numpy as np
import matplotlib.pyplot as pl
import scipy.stats as st

x = data[:, 0]
y = data[:, 1]
xmin, xmax = -0, 3
ymin, ymax = -0.4, 4.5

# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values)
f = np.reshape(kernel(positions).T, xx.shape)

fig = pl.figure()
ax = fig.gca()
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Purples')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')

pl.colorbar(cfset)

if save_images:
    ret.export_image('171102_genes_in_paper_for_different_genes/genes_in_publication_labelled_contour.pdf')
    
pl.show()

In [None]:
values