# Imports

In [1]:
# General imports
import os
import sys
import pandas as pd

pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np
import subprocess

from sklearn.cluster import KMeans

from tqdm import tqdm
# Other imports
tqdm.pandas()

import json

from pprint import pprint

# Custom utils
sys.path.append("/home/weber/PycharmProjects/EXOTIC/src")
from utils.utils import load_config_file

# Figures imports
import matplotlib

from matplotlib.lines import Line2D   
import matplotlib.patches as mpatches

import random 

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from statannot import add_stat_annotation
import matplotlib.font_manager as font_manager
import string

from pandarallel import pandarallel

pandarallel.initialize(nb_workers=60, progress_bar=True)

## YAML FILES CONFIG
yaml = load_config_file(config_file="/home/weber/PycharmProjects/EXOTIC/clean/src/config_clean_clean.yaml")

dicts = json.load(open("/home/weber/PycharmProjects/EXOTIC/src/EXOTIC_config.json"))


# Font settings
font_dirs = ['/home/weber/Fonts', ]
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
font_list = font_manager.createFontList(font_files)
font_manager.fontManager.ttflist.extend(font_list)

from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
rcParams['font.weight'] = 'light'
rcParams['axes.facecolor'] = 'white'


  from pandas import Panel


INFO: Pandarallel will run on 60 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


The createFontList function was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use FontManager.addfont instead.


In [None]:
# plt.style.use('default')
def show_values_on_bars(axs, i=0, fontsize=13, rotation=0, padding=1):
    def _show_on_single_plot(ax):
        for p in ax.patches:
            print(p)
            _x = p.get_x() + p.get_width() / 2
            _y = p.get_y() + (p.get_height()) + padding
            if i == 0:
                value = "{:,}".format(int(p.get_height()))
            if i == 2:
                value = "{:.2f}".format(p.get_height())

            if i == 3:
                value = "{:.3f}".format(p.get_height())
            ax.text(_x, _y, value, ha="center", fontsize=fontsize, rotation=rotation, color='black')

    if isinstance(axs, np.ndarray):
        for idx, ax in np.ndenumerate(axs):
            _show_on_single_plot(ax)
    else:
        _show_on_single_plot(axs)

f, ax = plt.subplots(nrows=2, figsize=(18,10))

sns.barplot(data=t.loc[t['CDS_count'] <= 30].groupby('CDS_count')['Intron_length_max'].mean().reset_index(), x='CDS_count', y='Intron_length_max', palette=['grey'], ax=ax[0])
ax[0].plot([0, 8], [9,21], color='red', lw=2, marker='o')
ax[0].plot([9,13], [22,22], color='red', lw=2, marker='o')
ax[0].plot([14, 20], [27,27], color='red', lw=2, marker='o')
ax[0].plot([21, 28], [35,35], color='red', lw=2, marker='o')

ax[0].spines['right'].set_linewidth(0)
ax[0].spines['top'].set_linewidth(0)
ax[0].set_ylabel('Longest intron mean length (kbp)')
ax[0].set_xlabel('')
ax[0].set_xlim(-0.5,28.5)
ax[0].set_axisbelow(True)
ax[0].grid(axis='y')
ax[0].set_title('Global')

print(ax[0].get_xticks())
show_values_on_bars(ax[0], fontsize=11)


sns.barplot(data=t.loc[t['CDS_count'] <= 30].groupby(['Miso_siso', 'CDS_count'])['Intron_length_max'].mean().reset_index(), x='CDS_count', y='Intron_length_max', hue='Miso_siso', palette=palette, ax=ax[1])
ax[1].spines['right'].set_linewidth(0)
ax[1].spines['top'].set_linewidth(0)
ax[1].set_ylabel('Longest intron mean length (kbp)')
ax[1].set_xlabel('Number of exons in the gene')
ax[1].set_axisbelow(True)
ax[1].grid(axis='y')
ax[1].legend(title='')
ax[1].set_title('Miso & Siso')

show_values_on_bars(ax[1], fontsize=11)



i = 0
for n, sub_a in enumerate(ax):
    sub_a.text(-0.1, 1.05, string.ascii_uppercase[i], transform=sub_a.transAxes, size=35, weight='bold')
    i += 1 
plt.tight_layout()