In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
! cat {snakemake.input['bed']} | wc -l

In [None]:
num_reads = pd.Series([
    int(next(open(i)))
    for i in snakemake.input['read_count']
])
num_reads.mean(), num_reads.std()

In [None]:
df_epicount = pd.concat([
    pd.read_csv(i, sep='\t')
    for i in snakemake.input['epicount']
])
df_epicount['method'] = 'EpiCount'

df_pyranges = pd.concat([
    pd.read_csv(i, sep='\t')
    for i in snakemake.input['pyranges']
])
df_pyranges['method'] = 'PyRanges'

df_bedtools = pd.concat([
    pd.read_csv(i, sep='\t')
    for i in snakemake.input['bedtools']
])
df_bedtools['method'] = 'bedtools'

df_epicount['max_rss_gb'] = df_epicount['max_rss'] / 1000
df_pyranges['max_rss_gb'] = df_pyranges['max_rss'] / 1000
df_bedtools['max_rss_gb'] = df_bedtools['max_rss'] / 1000

df = pd.concat([
    df_epicount.reset_index(drop=True),
    df_pyranges.reset_index(drop=True),
    df_bedtools.reset_index(drop=True)
]).reset_index()

In [None]:
cpu_per_1m = (df_pyranges['cpu_time'].values / (num_reads.values / 1_000_000))

cpu_per_1m.mean(), cpu_per_1m.std()

In [None]:
cpu_per_1m = (df_epicount['cpu_time'].values / (num_reads.values / 1_000_000))

cpu_per_1m.mean(), cpu_per_1m.std()

In [None]:
cpu_per_1m = (df_bedtools['cpu_time'].values / (num_reads.values / 1_000_000))

cpu_per_1m.mean(), cpu_per_1m.std()

In [None]:
df_epicount['cpu_time'].mean(), df_epicount['cpu_time'].std()

In [None]:
df_pyranges['cpu_time'].mean(), df_pyranges['cpu_time'].std()

In [None]:
df_bedtools['cpu_time'].mean(), df_bedtools['cpu_time'].std()

In [None]:
plt.figure(figsize=(3, 3))

sns.boxplot(data=df, x='method', y='cpu_time')
sns.swarmplot(data=df, x='method', y='cpu_time', color='black')
#dd sns.lineplot(data=df, x='method', y='cpu_time', color='black', hue='index', # palette=['black'],
#              alpha=0.5, legend=False)
plt.xlabel('')
plt.ylabel('CPU time (seconds)')
sns.despine()

plt.savefig(snakemake.output['fig_cpu'], dpi=300, bbox_inches='tight')

In [None]:
ratio = df_epicount['max_rss'] / df_pyranges['max_rss']

ratio.mean(), ratio.std()

In [None]:
df_epicount['max_rss_gb'].mean(), df_epicount['max_rss_gb'].std()

In [None]:
df_pyranges['max_rss_gb'].mean(), df_pyranges['max_rss_gb'].std()

In [None]:
df_bedtools['max_rss_gb'].mean(), df_bedtools['max_rss_gb'].std()

In [None]:
plt.figure(figsize=(3, 3))

sns.boxplot(data=df, x='method', y='max_rss_gb')
sns.swarmplot(data=df, x='method', y='max_rss_gb', color='black')
plt.yscale('log')
plt.xlabel('')
plt.ylabel('Memory Usage (GB)')
sns.despine()

plt.savefig(snakemake.output['fig_mem'], dpi=300, bbox_inches='tight')