In [6]:
import pyabc
import scipy
import numpy as np
from pyabc.visualization import plot_kde_matrix_highlevel,plot_kde_matrix2
from datetime import datetime
import pandas as pd
from arviz import hdi
from pyabc.visualization import plot_kde_matrix_highlevel, plot_kde_matrix
import numpy as np
import matplotlib.pyplot as plt

path = '../../results'
output_path = '../../figures'

f_no_aneuploidy = 'no-aneuploidy'
f_with_aneuploidy = 'basic-prior'
f_fixedm5 = 'fixedmr_5' 
f_fixedm6 = 'fixedmr_6'
f_fixedm7 = 'fixedmr_7'
f_fixedm36 = 'fixedmr_36' #3*10^-6
f_prior4 = 'extended-prior' # 0.370941 epsilon #bug it is not 1000 reps, it is 100, rerun
f_neutral = 'neutral-aneuploidy'
# f_wider_range = '2022-02-13-wider-mutation-rate-r100'

In [7]:
k1 = 'basic-prior'
k2 = 'tau2' 
k103125 = 'tau3332'
k5 = 'tau5' 
k10 = 'tau10'
k100 = 'tau100'
allf = [k1, k103125, k2, k5, k10, k100]

In [8]:
print('termination times:')
for f in [f_with_aneuploidy, f_no_aneuploidy, f_prior4, f_neutral, f_fixedm5, f_fixedm6, f_fixedm7, f_fixedm36]+allf:
    history = pyabc.History("sqlite:///{0}/{1}/{1}.db".format(path, f))
    print(f, round(history.get_all_populations()['epsilon'].values[-1],5))

termination times:
basic-prior 0.12659
no-aneuploidy 0.26716
extended-prior 0.37094
neutral-aneuploidy 0.18438
fixedmr_5 0.12735
fixedmr_6 0.12957
fixedmr_7 0.12996
fixedmr_36 0.12788
basic-prior 0.12659
tau3332 0.12827
tau2 0.12688
tau5 0.1277
tau10 0.12719
tau100 0.13082


In [65]:
class W_RV(pyabc.RVBase):
    def __init__(self):
        l = np.load('../../data/evo39_fitness_39deg.npz')
        self.kde = scipy.stats.gaussian_kde(l['arr_0'])
    def rvs(self, *args, **kwargs):
        return self.kde.resample(1)[0][0]
    def pdf(self, x, *args, **kwargs):
        return self.kde.pdf(x)[0]
    def copy(self):
        raise NotImplementedError('copy')
    def pmf(self, x, *args, **kwargs):
        raise NotImplementedError('pmf')
    def cdf(self, x, *args, **kwargs):
        raise NotImplementedError('cdf')
        
class D_RV(pyabc.RVBase):
    def __init__(self):
        l = np.load('../../data/refined_vs_evo39_fitness_39deg.npz')
        self.kde = scipy.stats.gaussian_kde(l['arr_0'])
    def rvs(self, *args, **kwargs):
        return self.kde.resample(1)[0][0]
    def pdf(self, x, *args, **kwargs):
        return self.kde.pdf(x)[0]
    def copy(self):
        raise NotImplementedError('copy')
    def pmf(self, x, *args, **kwargs):
        raise NotImplementedError('pmf')
    def cdf(self, x, *args, **kwargs):
        raise NotImplementedError('cdf')
        
prior = pyabc.Distribution(p1_mr=pyabc.RV("uniform", 10.0**-9, 10.0**-5-10.0**-9)
                            ,p2_tr=pyabc.RV("uniform", 10.0**-6, 10.0**-2-10.0**-6)
                            ,p3_w1=W_RV()
                            ,p4_w2=W_RV()
                            ,p5_w3=W_RV())

prior_alt = pyabc.Distribution(p1_mr=pyabc.RV("uniform", 10.0**-9, 10.0**-5-10.0**-9)
                            ,p2_tr=pyabc.RV("uniform", 10.0**-6, 10.0**-2-10.0**-6)
                            ,p3_w1=W_RV()
                            ,p4_w2=D_RV()
                            ,p5_w3=D_RV())

prior_fixed_mr = pyabc.Distribution(p2_tr=pyabc.RV("uniform", 10.0**-6, 10.0**-2-10.0**-6)
                            ,p3_w1=W_RV()
                            ,p4_w2=W_RV()
                            ,p5_w3=W_RV())

prior_no_aneuploidy = pyabc.Distribution(p1_mr=pyabc.RV("uniform", 10.0**-9, 10.0**-5-10.0**-9)
                        ,p5_w3=W_RV())

def sample_from_kde(file_name, t=None, alt_prior=False, size=50000):
    history = pyabc.History("sqlite:///{0}/{1}/{1}.db".format(path, file_name))
    samples, weights = history.get_distribution(m=0, t=t)
    kde = scipy.stats.gaussian_kde(samples.T.values.tolist(),weights=weights)
    kdesamples = kde.resample(2*size).T
    if alt_prior:
        kdesamples = pd.DataFrame(data=kdesamples,columns=['mutation rate', 'trisomy rate', '2n+1 fitness', '2n+1* fitness', '2n* fitness'])
        criteria = (kdesamples['mutation rate']>1e-9) & (kdesamples['mutation rate']<1e-5) & (kdesamples['trisomy rate']>1e-6) & (kdesamples['mutation rate']<1e-2)
        w1 = samples['p3_w1']
        w2 = samples['p3_w1']*samples['p4_w2']
        w3 = samples['p3_w1']*samples['p5_w3']
        order_criteria = (1<w1) & (w1<w2) & (w2<w3)
        criteria = criteria & order_criteria
        kdesamples = kdesamples[criteria][:size]
    elif len(samples.columns)==2:
        kdesamples = pd.DataFrame(data=kdesamples,columns=['mutation rate', '2n* fitness'])
        criteria = (kdesamples['mutation rate']>1e-9) & (kdesamples['mutation rate']<1e-5)
        kdesamples = kdesamples[criteria][:size]
    elif len(samples.columns)==4:
        kdesamples = pd.DataFrame(data=kdesamples,columns=['trisomy rate', '2n+1 fitness', '2n+1* fitness', '2n* fitness'])
        criteria = (kdesamples['trisomy rate']>1e-6) & (kdesamples['trisomy rate']<1e-2)
        kdesamples = kdesamples[criteria][:size]
    else:
        kdesamples = pd.DataFrame(data=kdesamples,columns=['mutation rate', 'trisomy rate', '2n+1 fitness', '2n+1* fitness', '2n* fitness'])
        criteria = (kdesamples['mutation rate']>1e-9) & (kdesamples['mutation rate']<1e-5) & (kdesamples['trisomy rate']>1e-6) & (kdesamples['mutation rate']<1e-2)
        order_criteria = (kdesamples['2n+1 fitness'] < kdesamples['2n+1* fitness']) & (kdesamples['2n+1* fitness'] < kdesamples['2n* fitness'])
        criteria = criteria & order_criteria
        kdesamples = kdesamples[criteria][:size]
    return kdesamples, kde

def WAIC_for(f, fixed_mut_rate=False, no_aneuploidy=False, alt_prior=False):
    if alt_prior:
        samples, kde = sample_from_kde(f, t=18, alt_prior=True)
    else:
        samples, kde = sample_from_kde(f)
    samples = samples.T.values
    posteriors = kde.pdf(samples)
    if fixed_mut_rate:
        priors = [prior_fixed_mr.pdf(dict(zip(['p2_tr','p3_w1','p4_w2','p5_w3'],s)))+1e-6 for s in samples.T]
    elif no_aneuploidy:
        priors = [prior_no_aneuploidy.pdf(dict(zip(['p1_mr','p5_w3'],s)))+1e-6 for s in samples.T]
    elif alt_prior:
        priors = [prior_alt.pdf(dict(zip(['p1_mr','p2_tr','p3_w1','p4_w2','p5_w3'],s)))+1e-6 for s in samples.T]
    else:
        priors = [prior.pdf(dict(zip(['p1_mr','p2_tr','p3_w1','p4_w2','p5_w3'],s)))+1e-6 for s in samples.T]
    likelihoods = posteriors/priors 
    return round(WAIC_with_weights(likelihoods))

In [66]:
results = []
results.append(dict(Model='Without aneuploidy', WAIC=WAIC_for(f_no_aneuploidy, fixed_mut_rate=False, no_aneuploidy=True)))
results.append(dict(Model='Fixed mutation rate, $\mu=10^{{-{5}}}$, $\\tau=1$', WAIC=WAIC_for(f_fixedm5, fixed_mut_rate=True)))
results.append(dict(Model='Fixed mutation rate, $\mu=10^{{-{6}}}$, $\\tau=1$', WAIC=WAIC_for(f_fixedm6, fixed_mut_rate=True)))
results.append(dict(Model='Fixed mutation rate, $\mu=10^{{-{7}}}$, $\\tau=1$', WAIC=WAIC_for(f_fixedm7, fixed_mut_rate=True)))
labels = ['Free mutation rate, $\\tau=1$','Free mutation rate, $\\tau=33/32$',
          'Free mutation rate, $\\tau=2$','Free mutation rate, $\\tau=5$','Free mutation rate, $\\tau=10$',
          'Free mutation rate, $\\tau=100$']
for l, f in zip(labels, allf):
    waic = WAIC_for(f)
    results.append(dict(Model=l, WAIC=waic))
results

[{'Model': 'Without aneuploidy', 'WAIC': -35},
 {'Model': 'Fixed mutation rate, $\\mu=10^{{-{5}}}$, $\\tau=1$', 'WAIC': -16},
 {'Model': 'Fixed mutation rate, $\\mu=10^{{-{6}}}$, $\\tau=1$', 'WAIC': -11},
 {'Model': 'Fixed mutation rate, $\\mu=10^{{-{7}}}$, $\\tau=1$', 'WAIC': -14},
 {'Model': 'Free mutation rate, $\\tau=1$', 'WAIC': -9},
 {'Model': 'Free mutation rate, $\\tau=33/32$', 'WAIC': -9},
 {'Model': 'Free mutation rate, $\\tau=2$', 'WAIC': -8},
 {'Model': 'Free mutation rate, $\\tau=5$', 'WAIC': -12},
 {'Model': 'Free mutation rate, $\\tau=10$', 'WAIC': -9},
 {'Model': 'Free mutation rate, $\\tau=100$', 'WAIC': -12}]

In [67]:
#running again, simillar results
results = []
results.append(dict(Model='Without aneuploidy', WAIC=WAIC_for(f_no_aneuploidy, fixed_mut_rate=False, no_aneuploidy=True)))
results.append(dict(Model='Fixed mutation rate, $\mu=10^{{-{5}}}$, $\\tau=1$', WAIC=WAIC_for(f_fixedm5, fixed_mut_rate=True)))
results.append(dict(Model='Fixed mutation rate, $\mu=10^{{-{6}}}$, $\\tau=1$', WAIC=WAIC_for(f_fixedm6, fixed_mut_rate=True)))
results.append(dict(Model='Fixed mutation rate, $\mu=10^{{-{7}}}$, $\\tau=1$', WAIC=WAIC_for(f_fixedm7, fixed_mut_rate=True)))
labels = ['Free mutation rate, $\\tau=1$','Free mutation rate, $\\tau=33/32$',
          'Free mutation rate, $\\tau=2$','Free mutation rate, $\\tau=5$','Free mutation rate, $\\tau=10$',
          'Free mutation rate, $\\tau=100$']
results.append(dict(Model='With aneuploidy, extended prior', WAIC=WAIC_for(f_prior4, alt_prior=True)))
for l, f in zip(labels, allf):
    waic = WAIC_for(f)
    results.append(dict(Model=l, WAIC=waic))
results

[{'Model': 'Without aneuploidy', 'WAIC': -35},
 {'Model': 'Fixed mutation rate, $\\mu=10^{{-{5}}}$, $\\tau=1$', 'WAIC': -16},
 {'Model': 'Fixed mutation rate, $\\mu=10^{{-{6}}}$, $\\tau=1$', 'WAIC': -11},
 {'Model': 'Fixed mutation rate, $\\mu=10^{{-{7}}}$, $\\tau=1$', 'WAIC': -14},
 {'Model': 'With aneuploidy, extended prior', 'WAIC': -30},
 {'Model': 'Free mutation rate, $\\tau=1$', 'WAIC': -9},
 {'Model': 'Free mutation rate, $\\tau=33/32$', 'WAIC': -9},
 {'Model': 'Free mutation rate, $\\tau=2$', 'WAIC': -8},
 {'Model': 'Free mutation rate, $\\tau=5$', 'WAIC': -12},
 {'Model': 'Free mutation rate, $\\tau=10$', 'WAIC': -9},
 {'Model': 'Free mutation rate, $\\tau=100$', 'WAIC': -12}]

In [57]:
WAIC_for(f_fixedm36, fixed_mut_rate=True) #3.6*1e-6

-12

In [63]:
WAIC_for(f_prior4, alt_prior=True)

-28

In [68]:
df = pd.DataFrame(results)
df.index = np.arange(1, len(df)+1)
df.index.rename('id', inplace=True)
df = df.reset_index().rename({'index':'index1'}, axis = 'columns')
# df.to_csv(f'{output_path}/Table_WAIC.csv', index=False, float_format="%.2f", sep=';')
df

Unnamed: 0,id,Model,WAIC
0,1,Without aneuploidy,-35
1,2,"Fixed mutation rate, $\mu=10^{{-{5}}}$, $\tau=1$",-16
2,3,"Fixed mutation rate, $\mu=10^{{-{6}}}$, $\tau=1$",-11
3,4,"Fixed mutation rate, $\mu=10^{{-{7}}}$, $\tau=1$",-14
4,5,"With aneuploidy, extended prior",-30
5,6,"Free mutation rate, $\tau=1$",-9
6,7,"Free mutation rate, $\tau=33/32$",-9
7,8,"Free mutation rate, $\tau=2$",-8
8,9,"Free mutation rate, $\tau=5$",-12
9,10,"Free mutation rate, $\tau=10$",-9
