In [None]:
import pandas as pd
import seaborn as sns
from glob import glob
import os
import matplotlib.pyplot as plt
sys.path.append(os.path.abspath('../'))
import metaprofile_across_regions as mar

: 

In [None]:
# Files
coverage_files = glob('../../../results/Metaprofiles/UTR3_termini/merged_replicates/*DOWN_*/*_unsmoothed*.tsv') + \
                 glob('../../../results/Metaprofiles/UTR3_termini/merged_replicates/*UP+CONTROL_*/*_unsmoothed*.tsv')

# Sample annotation
annot = pd.read_csv('../../../data/general/SampleAnnotation.csv', index_col=0)

# Save to
outpath = '../../../results/Metaprofiles/UTR3_termini/plots'
os.makedirs(outpath, exist_ok=True)

# Parameters
sw = 20 # Smoothin window

: 

In [None]:
coverage_files

: 

In [None]:
def smoothDf(df, sw):
    cols = [c for c in df.columns.tolist() if c in ['ciMax', 'ciMin', 'mean']]
    df[cols] = df[cols].rolling(sw, center=True,  win_type='triang', axis=0).mean()\
                    .fillna(axis=0, method='ffill')\
                    .fillna(axis=0, method='bfill')
    return df

: 

In [None]:
# Combine into one table
dfList = []
for f in coverage_files:
    df = pd.read_csv(f, sep='\t')
    df['Group'] = f.split('/')[-2].split('_')[1]
    df['Condition'] = f.split('/')[-2].split('cond-')[-1]
    df['Norm'] = f.split('/')[-1].split('norm-')[-1].split('.')[0]
    dfList.append(df)
dfCombined = pd.concat(dfList)

: 

In [None]:
# Smooth with a desired window
dfCombined = dfCombined.groupby(['Sample', 'Condition', 'Norm', 'Group']).apply(smoothDf, sw=sw)

: 

In [None]:
# Annotate samples
dfCombined = dfCombined.merge(annot, on=['Sample', 'Condition'], how='left')
dfCombined.head()

: 

In [None]:
# Plot
# Plot LIN28A aroud PAS in UP / DOWN / CTRL -  libnorm and expr norm side by side
dfPlot = dfCombined.loc[(dfCombined.Protein == 'LIN28A') & (dfCombined.Group!='naive')]

hue_order = ['S200WT_FCL', 'S200WT_2iL', 'S200A_FCL']
colorMap = {sample: sns.color_palette('tab10', len(hue_order))[i] for i, sample in enumerate(hue_order)}

g = sns.relplot(
    data=dfPlot, x='Position', y='mean', hue='Condition',
    style='Group', kind='line', col='Norm', facet_kws={'sharey': False, 'sharex': True},
    style_order=['DOWN', 'UP+CONTROL'], hue_order=['S200WT_FCL', 'S200WT_2iL', 'S200A_FCL'],
    col_order=['libsize', 'by_reg'], palette=colorMap
    )

for idx, ax in g.axes_dict.items():
    df = dfPlot.loc[dfPlot['Norm'] == idx]
    ax.axvline(0, color='black', ls='dashed')
    for line_ids, dft in df.groupby(['Condition', 'Group']):
        ax.fill_between(dft.Position, dft.ciMax, dft.ciMin, alpha=0.2, color=colorMap[line_ids[0]])
        if idx[-1] == 'libsize':
            ax.set_ylabel('mean CPM across regions of interest\n(CI represents variation between evaluated regions)')
        else:
            ax.set_ylabel('mean CPM per TPM across regions of interest\n(CI represents variation between evaluated regions)')
g.fig.savefig(f'{outpath}/LIN28A_metaprofiles.pdf', bbox_inches='tight')

: 

In [None]:
# # Plot PABPC1 and C4 aroud PAS in UP / DOWN / CTRL -  libnorm and expr norm side by side
dfPlot = dfCombined.loc[(dfCombined.Protein != 'LIN28A') & (dfCombined.Group!='naive')]

hue_order = ['S200WT_FCL', 'KO_FCL']
colorMap = {sample: sns.color_palette('tab10', len(hue_order))[i] for i, sample in enumerate(hue_order)}

g = sns.relplot(
    data=dfPlot, x='Position', y='mean', hue='Condition',
    style='Group', kind='line', col='Norm', row='Protein', facet_kws={'sharey': False, 'sharex': True},
    style_order=['DOWN', 'UP+CONTROL'], hue_order=hue_order, col_order=['libsize', 'by_reg'], palette=colorMap,
    )

for idx, ax in g.axes_dict.items():
    print(idx)
    df = dfPlot.loc[(dfPlot['Norm'] == idx[1]) & (dfPlot['Protein'] == idx[0])]
    ax.axvline(0, color='black', ls='dashed')
    for line_ids, dft in df.groupby(['Condition', 'Group']):
        ax.fill_between(dft.Position, dft.ciMax, dft.ciMin, alpha=0.2, color=colorMap[line_ids[0]])
        if idx[-1] == 'libsize':
            ax.set_ylabel('mean CPM across regions of interest\n(CI represents variation between evaluated regions)')
        else:
            ax.set_ylabel('mean CPM per TPM across regions of interest\n(CI represents variation between evaluated regions)')
g.fig.savefig(f'{outpath}/PABP_metaprofiles.pdf', bbox_inches='tight')

: 