In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from statsmodels.stats.proportion import proportion_confint

In [None]:
res = pd.read_csv( snakemake.input.results )
res["parsimony_placements"] = res["result"].str.extract(r"[A-Z0-9.]+\([0-9]+\/([0-9]+)\)")
res["parsimony_placements"] = pd.to_numeric( res["parsimony_placements"] )
res.head()

In [None]:
summ = res.groupby( "fraction_missing" )["correct"].agg( ["count", "sum"] )
summ.columns = ["observations", "successes"]
summ = summ.reset_index()
summ["accuracy"] = summ["successes"] / summ["observations"]
summ[["accuracy_low", "accuracy_high"]] = summ.apply( lambda x: pd.Series( proportion_confint( x["successes"], x["observations"], alpha=0.05, method="jeffreys" ) ), axis=1 )
summ.head()

In [None]:
fig, ax = plt.subplots( dpi=200, figsize=(5,4) )

ax.plot( "fraction_missing", "accuracy", data=summ, color="black", zorder=100 )
ax.fill_between( "fraction_missing", "accuracy_low", "accuracy_high", data=summ, color="black", linewidth=0, alpha=0.2, zorder=99)

ax.xaxis.set_major_formatter( mticker.PercentFormatter( 1 ) )
ax.yaxis.set_major_formatter( mticker.PercentFormatter( 1 ) )

ax.set_xlim( 0, 1.01 )
ax.set_ylim( 0, 1.01 )

ax.set_xticks( np.arange( 0, 1, 0.05 ), minor=True )
ax.set_yticks( np.arange( 0, 1, 0.05 ), minor=True )

ax.set_xlabel( "Variants masked", fontweight="bold" )
ax.set_ylabel( "Accuracy", fontweight="bold" )

ax.grid( which="both", color="#EFEFEF")

plt.tight_layout()
plt.savefig( snakemake.output.accuracy_plot )
plt.show()

In [None]:
pp = res.groupby( "fraction_missing" )["parsimony_placements"].describe( percentiles=[0.025, 0.5, 0.975])
pp = pp.reset_index()
pp.head()

In [None]:
fig, ax = plt.subplots( dpi=200, figsize=(5,4) )

ax.plot( "fraction_missing", "50%", data=pp, color="black", zorder=100 )
ax.fill_between( "fraction_missing", "2.5%", "97.5%", data=pp, color="black", linewidth=0, alpha=0.2, zorder=99)

ax.xaxis.set_major_formatter( mticker.PercentFormatter( 1 ) )
#ax.yaxis.set_major_formatter( mticker.PercentFormatter( 1 ) )

ax.set_xlim( 0, 1.01 )
ax.set_ylim( 0, 10 )

ax.set_xticks( np.arange( 0, 1, 0.05 ), minor=True )
ax.set_yticks( np.arange( 0, 10, 1 ), minor=True )
ax.set_yticks( np.arange( 0, 15, 5 ), minor=False )

ax.set_xlabel( "Variants masked (%)", fontweight="bold" )
ax.set_ylabel( "Parsimoneous placements", fontweight="bold" )

ax.grid( which="both", color="#EFEFEF")

plt.tight_layout()
plt.savefig( snakemake.output.parsimony_plot )
plt.show()