In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

In [None]:
bm = pd.read_csv( snakemake.input.results )
bm.head()

In [None]:
total_time = bm.pivot_table( index=["reads", "trial"], columns="step", values="cpu_time", fill_value=0 )
total_time = total_time.sum(axis=1).reset_index()
total_time = total_time.rename( columns={0: "time"})
total_time["time_rel"] = (total_time["time"] / total_time["reads"]) * 1000
total_time.head()

In [None]:
def basic_exp( x, a, b ):
    return a * np.power( x, b )

fit, _ = curve_fit( basic_exp, xdata=total_time["reads"], ydata=total_time["time"] )

xs = np.logspace(np.log(5), np.log(5000000), 100)
ys = [basic_exp( x, *fit ) for x in xs]

fig, ax = plt.subplots( dpi=200, figsize=(12, 3.5), ncols=3, sharex=True )

ax[0].scatter( "reads", "time", data=total_time, s=10, zorder=100, c="#009E73" )
ax[0].plot( xs, ys, color="black", linewidth=1, linestyle="--", zorder=50 )
ax[0].set_xscale( "log" )

ax[0].set_xlabel( "Reads",fontweight="bold" )
ax[0].set_ylabel( "CPU time (seconds)",fontweight="bold" )

ax[0].set_xticks( [100, 1000, 10000, 100000, 1000000] )
ax[0].set_xlim(50, 5000000)
ax[0].set_ylim(0, 35)

ax[0].grid( which="major", axis="both", linewidth=1, color="#F1F1F1", zorder=1 )
ax[0].grid( which="minor", axis="both", linewidth=0.5, color="#F1F1F1", zorder=1 )

fit2, _ = curve_fit( basic_exp, xdata=total_time["reads"], ydata=total_time["time_rel"] )

ys2 = [basic_exp( x, *fit2 ) for x in xs]

ax[1].scatter( "reads", "time_rel", data=total_time, s=10, zorder=100, c="#009E73" )
ax[1].plot( xs, ys2, color="black", linewidth=1, linestyle="--", zorder=50 )

ax[1].set_ylim(0, 60)

ax[1].set_xlabel( "Reads",fontweight="bold" )
ax[1].set_ylabel( "CPU time / 1,000 reads (seconds)",fontweight="bold" )

ax[1].grid( which="major", axis="both", linewidth=1, color="#F1F1F1", zorder=1 )
ax[1].grid( which="minor", axis="both", linewidth=0.5, color="#F1F1F1", zorder=1 )

mem = bm.pivot_table( index=["reads", "trial"], columns="step", values="max_rss", fill_value=0 ).max(axis=1).reset_index()
mem = mem.rename( columns={0: "max_rss"} )

fit3, _ = curve_fit( basic_exp, xdata=mem["reads"], ydata=mem["max_rss"] )

ys3 = [basic_exp( x, *fit3 ) for x in xs]

ax[2].scatter( "reads", "max_rss", data=mem, s=10, zorder=100, c="#009E73")
ax[2].plot( xs, ys3, color="black", linewidth=1, linestyle="--", zorder=50 )
ax[2].set_xlabel( "Reads",fontweight="bold" )
ax[2].set_ylabel( "Max memory (MB)",fontweight="bold" )
ax[2].set_ylim(0, 450)

ax[2].grid( which="major", axis="both", linewidth=1, color="#F1F1F1", zorder=1 )
ax[2].grid( which="minor", axis="both", linewidth=0.5, color="#F1F1F1", zorder=1 )

plt.tight_layout()
plt.savefig( snakemake.output.benchmark_plots )
plt.show()