In [None]:
from matplotlib import pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
import os
from contextlib import redirect_stderr

In [None]:
mpl.rcParams['font.family']       = 'Helvetica'
mpl.rcParams['font.sans-serif']   = ["Helvetica","Arial","DejaVu Sans","Lucida Grande","Verdana"]
mpl.rcParams['figure.figsize']    = [4,3]
mpl.rcParams['font.size']         = 9
mpl.rcParams["axes.labelcolor"]   = "#000000"
mpl.rcParams["axes.linewidth"]    = 1.0 
mpl.rcParams["xtick.major.width"] = 1.0
mpl.rcParams["ytick.major.width"] = 1.0
cmap1 = plt.cm.tab10
cmap2 = plt.cm.Set3  
colors1 = [cmap1(i) for i in range(0,10)]
colors2 = [cmap2(i) for i in range(0,12)] 
plt.style.use('default')
colors_original_vs_FRACTAL=['#FFD479', '#73CBD6', '#7F33FF']
colors2 = [cmap2(i) for i in range(0,12)] 

In [None]:
os.chdir("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0144")
try:
    os.mkdir("figures")
except:
    None

In [None]:
def cilen(arr, alpha=0.95):
    if len(arr) <= 1:
        return 0
    m, e, df = np.mean(arr), stats.sem(arr), len(arr) - 1
    interval = stats.t.interval(alpha, df, loc=m, scale=e)
    cilen = np.max(interval) - np.mean(interval)
    return cilen

In [None]:
df_accuracy = pd.read_csv("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0144/result/accuracy.all.csv", names=['method', 'TASK_ID', 'Nseq', 'rep', 'treemethod', 'threshold', 'Ntips','NRFD'])
df_accuracy = df_accuracy.replace('Error', np.nan)

df_accuracy['Coverage'] = df_accuracy['Ntips'].astype('float') / df_accuracy['Nseq'] 
df_accuracy['Accuracy'] = (1 - df_accuracy['NRFD'].astype(float))
df_accuracy['Recovery'] = df_accuracy['Coverage'] * df_accuracy['Accuracy']*100
df_accuracy = df_accuracy.sort_values('treemethod', kind='mergesort').sort_values('rep', kind='mergesort').sort_values('Nseq', kind='mergesort').sort_values('method', kind='mergesort')
df_accuracy

In [None]:
df_runtime_memory = pd.read_csv("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0144/result/runtime_memory.all.manual.csv")
df_runtime_memory = df_runtime_memory.sort_values('treemethod', kind='mergesort').sort_values('rep', kind='mergesort').sort_values('Nseq', kind='mergesort').sort_values('method', kind='mergesort')
df_runtime_memory

In [None]:
list(df_accuracy['Nseq']) == list(df_runtime_memory['Nseq'])

In [None]:
list(df_accuracy['rep']) == list(df_runtime_memory['rep'])

In [None]:
list(df_accuracy['method']) == list(df_runtime_memory['method'])

In [None]:
df_all = df_runtime_memory
df_all['Recovery'] = list(df_accuracy['Recovery'])
df_all['Accuracy'] = list(df_accuracy['Accuracy']*100)
df_all['Ntips'] = list(df_accuracy['Ntips'])
df_all

In [None]:
for value, ylabel in [("Accuracy", "Accuracy (%)"), ("RunTime", 'Run time (sec)'), ("SingleMemory",'Max memory usage\nper node (KB)')]:
#value = "Recovery"
#ylabel = "Recovery ratio (%)"
    for treemethod, title in [("rapidnjNJ", "NJ (RapidNJ)"), ("raxmlMP", "MP (RAxML)"), ("fasttreeML","ML (FastTree)")]:
        
        df_treemethod = df_all[
            (df_all['treemethod']==treemethod) &
            (~df_all['Ntips'].isnull())
            ]


        # make pivot table

        with redirect_stderr(open(os.devnull, 'w')):
            m = df_treemethod.pivot_table(index='Ntips', columns='method', values=value, aggfunc='mean') # index='Ntips'なので系譜推定できている条件しか拾わない
            e = df_treemethod.pivot_table(index='Ntips', columns='method', values=value, aggfunc=cilen)

        
        # Visualize

        fig = plt.figure(figsize=(1.9,2.5))
        ax = fig.add_axes([0.1,0.1,0.8,0.8])

        methods     = ["original", "fractal1node", "fractal100nodes"]
        m1 = m.loc[:,methods]
        if (value=="RunTime"): m1.plot(ax=ax,kind='bar', width=0.85, ylim=[1,1000000], logy=True, legend=False,yerr=e,capsize=1,error_kw={"elinewidth":0.4, "capthick":0.4},label='',color=colors_original_vs_FRACTAL,linewidth=0.4,edgecolor='black',)
        elif (value=="SingleMemory"): m1.plot(ax=ax,kind='bar', width=0.80, ylim=[10000,40000000], logy=True, legend=False,yerr=e,capsize=1,error_kw={"elinewidth":0.4, "capthick":0.4},label='',color=colors_original_vs_FRACTAL,linewidth=0.4, edgecolor='black',)
        elif (value=="Accuracy"): m1.plot(ax=ax,kind='bar', width=0.85, ylim=[99,100.05], legend=False,yerr=e,capsize=1,error_kw={"elinewidth":0.4, "capthick":0.4},label='',color=colors_original_vs_FRACTAL,linewidth=0.4, edgecolor='black',)
        elif (value=="Coverage"): m1.plot(ax=ax,kind='bar', width=0.85, ylim=[0,1.05], legend=False,yerr=e,capsize=1,error_kw={"elinewidth":0.4, "capthick":0.4},label='',color=colors_original_vs_FRACTAL,linewidth=0.4, edgecolor='black',)

        sns.stripplot(data = df_treemethod, x = "Nseq", y = value, hue = "method", hue_order=methods, dodge=True, size = 1, color = "#AAAAAA", jitter = 0.3)
        ax.get_legend().remove()

        ax.set_title(title,pad=10,fontsize=10)
        ax.set_xlabel('')
        ax.set_xticklabels(['1,024','16,384','262,144','4,194,304'])
        ax.set_ylabel(ylabel,fontsize=10)
        plt.savefig("figures/NK_0144_"+ value +"_" +treemethod +".pdf", bbox_inches='tight')
        #plt.show()
        plt.close()

In [None]:
for treemethod in ["rapidnjNJ", "raxmlMP", "fasttreeML"]:
    for target in ["RunTime"]:
        for Nseq in [16384, 262144, 4194304]:
            df_treemethod = df_all[df_all['treemethod']==treemethod]

            df_fractal_parallel = df_treemethod[(df_treemethod["method"] == "fractal100nodes") & (df_treemethod["Nseq"]==Nseq)]
            df_fractal_single   = df_treemethod[(df_treemethod["method"] == "fractal1node")    & (df_treemethod["Nseq"]==Nseq)]
            df_merge = pd.merge(df_fractal_single, df_fractal_parallel, on = 'rep')
            test_result = stats.ttest_rel(df_merge[target+"_x"], df_merge[target+"_y"])

            onenode_to_handrednodes_averatio = np.mean(df_merge[target+"_x"]) / np.mean(df_merge[target+"_y"])

            print(treemethod, target, Nseq, "1 node / 100 nodes", onenode_to_handrednodes_averatio, "times", "p=", test_result.pvalue, sep = ",")

In [None]:
for treemethod in ["rapidnjNJ", "raxmlMP", "fasttreeML"]:
    for target in ["SingleMemory"]:
        for Nseq in [16384, 262144, 4194304]:
            df_treemethod = df_all[df_all['treemethod']==treemethod]

            df_original = df_treemethod[(df_treemethod["method"] == "original") & (df_treemethod["Nseq"]==Nseq)]
            df_fractal_single = df_treemethod[(df_treemethod["method"] == "fractal1node") & (df_treemethod["Nseq"]==Nseq)]
            df_merge = pd.merge(df_original, df_fractal_single, on = 'rep')
            test_result = stats.ttest_rel(df_merge[target+"_x"], df_merge[target+"_y"])

            onenode_to_handrednodes_averatio = np.mean(df_merge[target+"_y"]) / np.mean(df_merge[target+"_x"])

            print(treemethod, target, Nseq, "fractal / original", onenode_to_handrednodes_averatio, "times", "p=", test_result.pvalue, sep = ",")