In [None]:
from Bio import Phylo
from matplotlib import pyplot as plt
import matplotlib as mpl
import numpy as np
import os
import pandas as pd
import seaborn as sns
import random
import copy

In [None]:
mpl.rcParams['font.family']       = 'Helvetica'
mpl.rcParams['font.sans-serif']   = ["Helvetica","Arial","DejaVu Sans","Lucida Grande","Verdana"]
mpl.rcParams['figure.figsize']    = [4,3]
mpl.rcParams['font.size']         = 9
mpl.rcParams["axes.labelcolor"]   = "#000000"
mpl.rcParams["axes.linewidth"]    = 1.0 
mpl.rcParams["xtick.major.width"] = 1.0
mpl.rcParams["ytick.major.width"] = 1.0
cmap1 = plt.cm.tab10
cmap2 = plt.cm.Set3  
colors1 = [cmap1(i) for i in range(0,10)]
colors2 = [cmap2(i) for i in range(0,12)] 
plt.style.use('default')

In [None]:
os.chdir("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0146/")
try:
    os.mkdir('figures')
except:
    None
try:
    os.mkdir('table')
except:
    None

In [None]:
df = pd.read_csv("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0146/result/result.cassiopeia.csv", names = ["ID", "one_minus_mut_rate", "rep", "method", "Memory", "Mem_unit", "Time", "Time_unit", "Ntips", "NRFD", "Triplet"])
df

In [None]:
for method in ["greedy", "ilp", "hybrid"]:
    df = pd.read_csv("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0146/result/result.cassiopeia.csv", names = ["ID", "one_minus_mut_rate", "rep", "method", "Memory", "Mem_unit", "Time", "Time_unit", "Ntips", "NRFD", "Triplet"])
    df = df[(df["method"]==method)]
    df ["mut_rate"] = [1 - float("0."+str(one_minus_mut_rate)) for one_minus_mut_rate in df["one_minus_mut_rate"]]
    df ["Accuracy"] = ( 1 - df["NRFD"] )
    fig = plt.figure(figsize=(2.2,2.2))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.scatter(df["mut_rate"], df["Accuracy"]*100, color = "#F3A83B", alpha = 1, s = 5)
    ax.set_xlabel("Mutation rate")
    ax.set_ylabel("Accuracy (%)")
    ax.set_xlim(0,0.55)
    ax.set_ylim(0,105)
    ax.set_title(method)
    plt.savefig("figures/NK_0146_cassiopeia_"+method+"_accuracy.pdf", bbox_inches = 'tight')
    plt.close()

    fig = plt.figure(figsize=(2.2,2.2))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.scatter(df["mut_rate"], df["Triplet"]*100, color = "#3BA8F3", alpha = 1, s = 5)
    ax.set_xlabel("Mutation rate")
    ax.set_ylabel("Triplets correct (%)")
    ax.set_xlim(0,0.55)
    ax.set_ylim(0,105)
    ax.set_title(method)
    plt.savefig("figures/NK_0146_cassiopeia_"+method+"_triplets.pdf", bbox_inches = 'tight')
    plt.close()

In [None]:
df = pd.read_csv("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0146/result/result.fractal_cassiopeia.csv", names = ["ID", "one_minus_mut_rate", "rep", "method", "Memory", "Mem_unit", "Time", "Time_unit", "Ntips", "NRFD", "Triplet"])
df

In [None]:
for software, method in [("cassiopeia", "greedy"), ("cassiopeia", "hybrid"), ("cassiopeia", "ilp")]:
        df = pd.read_csv("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0146/result/result.fractal_cassiopeia.csv", names = ["ID", "one_minus_mut_rate", "rep", "method", "Memory", "Mem_unit", "Time", "Time_unit", "Ntips", "NRFD", "Triplet"])
        df = df[df["method"]==method]
        df ["mut_rate"] = [1 - float("0."+str(one_minus_mut_rate)) for one_minus_mut_rate in df["one_minus_mut_rate"]]
        df ["Accuracy"] = ( 1 - df["NRFD"] )
        df ["Coverage"] = ( df["Ntips"]/400 )
        fig = plt.figure(figsize=(2.2,2.2))
        ax = fig.add_axes([0.1,0.1,0.8,0.8])
        ax.scatter(df["mut_rate"], df["Accuracy"]*100, color = "#F3A83B", alpha = 1, s = 5)
        #sns.regplot(df["mut_rate"], df["Accuracy"]*100, color = "#F3A83B", scatter_kws = {"alpha":1, 's':5})
        ax.set_xlabel("Mutation rate")
        ax.set_ylabel("Accuracy (%)")
        ax.set_xlim(0,0.55)
        ax.set_ylim(0,105)
        ax.set_title(method)
        plt.savefig("figures/NK_0146_fractal_"+software+"_"+method+"_accuracy.pdf", bbox_inches = 'tight')
        plt.close()

        fig = plt.figure(figsize=(2.2,2.2))
        ax = fig.add_axes([0.1,0.1,0.8,0.8])
        ax.scatter(df["mut_rate"], df["Triplet"]*100, color = "#3BA8F3", alpha = 1, s = 5)
        ax.set_xlabel("Mutation rate")
        ax.set_ylabel("Triplets correct (%)")
        ax.set_xlim(0,0.55)
        ax.set_ylim(0,105)
        ax.set_title(method)
        plt.savefig("figures/NK_0146_fractal_"+software+"_"+method+"_triplets.pdf", bbox_inches = 'tight')
        plt.close()

        fig = plt.figure(figsize=(2.2,2.2))
        ax = fig.add_axes([0.1,0.1,0.8,0.8])
        ax.scatter(df["mut_rate"], df["Coverage"]*100, color = "green", alpha = 1, s = 5)
        ax.set_xlabel("Mutation rate")
        ax.set_ylabel("Coverage (%)")
        ax.set_xlim(0,0.55)
        ax.set_ylim(0,105)
        ax.set_title(method)
        plt.savefig("figures/NK_0146_fractal_"+software+"_"+method+"_coverage.pdf", bbox_inches = 'tight')
        plt.close()

In [None]:
'''
df_vary_time = pd.read_csv("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0146/result/fractal_cassiopeia_vary_time_limit/result.fractal_cassiopeia_vary_time_limit.manual.csv", names = ["ID", "Optimization_time", "one_minus_mut_rate", "rep", "method", "Memory_dummy", "Mem_unit_dummy", "Time_dummy", "Time_unit_dummy", "Memory", "Mem_unit", "Time", "Time_unit", "Ntips", "NRFD", "Triplet"])
df_vary_time ["mut_rate"] = [1 - float("0."+str(one_minus_mut_rate)) for one_minus_mut_rate in df_vary_time["one_minus_mut_rate"]]
df_vary_time ["Accuracy"] = ( 1 - df_vary_time["NRFD"] )
df_vary_time ["Coverage"] = ( df_vary_time["Ntips"]/400 )
df_vary_time ["Optimization_time_str"] = [str(time) for time in df_vary_time ["Optimization_time"]]
df_vary_time
'''

In [None]:
'''
software = "cassiopeia"
method = "ilp"
rep    = 0
#Optimization_time = 12600
#df_vary_time_ext = df_vary_time[(df_vary_time["method"]==method) & (df_vary_time_ext["Optimization_time"]==Optimization_time)]
df_vary_time_ext = df_vary_time[(df_vary_time["method"]==method) & (df_vary_time["rep"]==rep) &(df_vary_time["Optimization_time"]>1000)]
df_vary_time_ext
'''

In [None]:
'''
for software, method in [("cassiopeia", "hybrid"), ("cassiopeia", "ilp")]:
    for rep in [0,1]:
        #software = "cassiopeia"
        #method = "ilp"
        df_vary_time_ext = df_vary_time[(df_vary_time["method"]==method) & (df_vary_time["rep"]==rep)]
        #rep    = 0

        fig = plt.figure(figsize=(2.2,2.2))
        ax = fig.add_axes([0.1,0.1,0.8,0.8])
        sns.scatterplot(x = "mut_rate", y = "Accuracy", data = df_vary_time_ext,  hue = "Optimization_time", ax = ax)
        ax.set_xlim(0,0.5)
        ax.set_ylim(0,1)
        ax.get_legend().remove()
        #ax.legend(bbox_to_anchor=(1.05, 1),)
        ax.set_title(method+ " "+str(rep), fontsize=10)
        ax.set_xlabel("Mutation rate")
        ax.set_ylabel("Accuracy")
        plt.savefig("figures/NK_0146_fractal_"+software+"_"+method+"_rep"+str(rep)+"_accuracy.pdf", bbox_inches = 'tight')

        fig = plt.figure(figsize=(2.2,2.2))
        ax = fig.add_axes([0.1,0.1,0.8,0.8])
        sns.scatterplot(x = "mut_rate", y = "Triplet", data = df_vary_time_ext,  hue = "Optimization_time", ax = ax)
        ax.set_xlim(0,0.5)
        ax.set_ylim(0,1)
        ax.get_legend().remove()
        #ax.legend(bbox_to_anchor=(1.05, 1),)
        ax.set_title(method+ " "+str(rep), fontsize=10)
        ax.set_xlabel("Mutation rate")
        ax.set_ylabel("Triplets correct (%)")
        plt.savefig("figures/NK_0146_fractal_"+software+"_"+method+"_rep"+str(rep)+"_triplet.pdf", bbox_inches = 'tight')


        fig = plt.figure(figsize=(2.2,2.2))
        ax = fig.add_axes([0.1,0.1,0.8,0.8])
        sns.scatterplot(x = "mut_rate", y = "Coverage", data = df_vary_time_ext,  hue = "Optimization_time", ax = ax)
        ax.set_xlim(0,0.5)
        ax.set_ylim(0,1)
        ax.get_legend().remove()
        #ax.legend(bbox_to_anchor=(1.05, 1),)
        ax.set_title(method+ " "+str(rep), fontsize=10)
        ax.set_xlabel("Mutation rate")
        ax.set_ylabel("Coverage")
        plt.savefig("figures/NK_0146_fractal_"+software+"_"+method+"_rep"+str(rep)+"_coverage.pdf", bbox_inches = 'tight')
'''

In [None]:
df_original = pd.read_csv("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0146/result/result.cassiopeia.csv", names = ["ID", "one_minus_mut_rate", "rep", "method", "Memory", "Mem_unit", "Time", "Time_unit", "Ntips", "NRFD", "Triplet"])
df_original ["mut_rate"] = [1 - float("0."+str(one_minus_mut_rate)) for one_minus_mut_rate in df_original["one_minus_mut_rate"]]
df_original ["Accuracy"] = ( 1 - df_original["NRFD"] )
df_original ["Coverage"] = ( df_original["Ntips"]/400 )
df_fractalized = pd.read_csv("/Users/nk/Documents/backupped/Research/YachieLabLocal/FRACTAL/data/NK_0146/result/result.fractal_cassiopeia.csv", names = ["ID", "one_minus_mut_rate", "rep", "method", "Memory", "Mem_unit", "Time", "Time_unit", "Ntips", "NRFD", "Triplet"])
df_fractalized ["mut_rate"] = [1 - float("0."+str(one_minus_mut_rate)) for one_minus_mut_rate in df_fractalized["one_minus_mut_rate"]]
df_fractalized ["Accuracy"] = ( 1 - df_fractalized["NRFD"] )
df_fractalized ["Coverage"] = ( df_fractalized["Ntips"]/400 )

for software, method in [("cassiopeia", "greedy"), ("cassiopeia", "hybrid"), ("cassiopeia", "ilp")]:

    df_original_ext    = df_original[df_original["method"]==method]
    df_fractalized_ext = df_fractalized[df_fractalized["method"]==method]
        
    fig = plt.figure(figsize=(2.2*1.618033988,2.2))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.scatter(df_fractalized_ext["mut_rate"], df_fractalized_ext["Accuracy"]*100, color = '#73CBD6', alpha = 1, s = 25)
    ax.scatter(df_original_ext   ["mut_rate"], df_original_ext   ["Accuracy"]*100, color = '#FFD479', alpha = 1, s = 10)
    #sns.regplot(df["mut_rate"], df["Accuracy"]*100, color = "#F3A83B", scatter_kws = {"alpha":1, 's':5})
    ax.set_xlabel("Mutation rate")
    ax.set_ylabel("Accuracy (%)")
    ax.set_xlim(0,0.55)
    ax.set_ylim(0,105)
    ax.set_title(method)
    plt.savefig("figures/NK_0146_original_vs_fractal_"+software+"_"+method+"_accuracy.pdf", bbox_inches = 'tight')
    plt.close()

    fig = plt.figure(figsize=(2.2*1.618033988,2.2))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.scatter(df_fractalized_ext["mut_rate"], df_fractalized_ext["Triplet"]*100, color = '#73CBD6', alpha = 1, s = 25)
    ax.scatter(df_original_ext["mut_rate"]   , df_original_ext["Triplet"]*100   , color = '#FFD479', alpha = 1, s = 10)
    
    ax.set_xlabel("Mutation rate")
    ax.set_ylabel("Triplets correct (%)")
    ax.set_xlim(0,0.55)
    ax.set_ylim(0,105)
    ax.set_title(method)
    plt.savefig("figures/NK_0146_original_vs_fractal_"+software+"_"+method+"_triplets.pdf", bbox_inches = 'tight')
    plt.close()

    fig = plt.figure(figsize=(2.2*1.618033988,2.2))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.scatter(df_fractalized_ext["mut_rate"], df_fractalized_ext["Coverage"]*100, color = '#73CBD6', alpha = 1, s = 25)
    ax.set_xlabel("Mutation rate")
    ax.set_ylabel("Coverage (%)")
    ax.set_xlim(0,0.55)
    ax.set_ylim(0,105)
    ax.set_title(method)
    plt.savefig("figures/NK_0146_original_vs_fractal_"+software+"_"+method+"_coverage.pdf", bbox_inches = 'tight')
    plt.close()

    fig = plt.figure(figsize=(2.2*1.618033988,2.2))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.scatter(df_fractalized_ext["mut_rate"], df_fractalized_ext["Coverage"]*100, color = '#73CBD6', alpha = 1, s = 10)
    ax.set_xlabel("Mutation rate")
    ax.set_ylabel("Coverage (%)")
    ax.set_xlim(0,0.55)
    ax.set_ylim(0,105)
    ax.set_title(method)
    plt.savefig("figures/NK_0146_original_vs_fractal_"+software+"_"+method+"_coverage_small.pdf", bbox_inches = 'tight')
    plt.close()