In [35]:
import pyabc
import scipy
import numpy as np
from pyabc.visualization import plot_kde_matrix_highlevel,plot_kde_matrix2,plot_kde_matrix_for_paper
from datetime import datetime
import pandas as pd
from arviz import hdi
from pyabc.visualization import plot_kde_matrix_highlevel, plot_kde_matrix
import numpy as np
import matplotlib.pyplot as plt

path = '../../results'

f_no_aneuploidy = 'no-aneuploidy'
f_with_aneuploidy = 'basic-prior'
f_tau100 = 'tau100'
f_tau10 = 'tau10'

history = pyabc.History("sqlite:///{0}/{1}/{1}.db".format(path,f_no_aneuploidy))
history.get_all_populations()[-3:]

Unnamed: 0,t,population_end_time,samples,epsilon,particles
31,30,2022-01-06 20:40:28.751281,9066,0.275595,14
32,31,2022-01-08 21:03:27.959201,26766,0.272928,11
33,32,2022-01-12 13:03:32.838836,53034,0.267157,14


In [36]:
history = pyabc.History("sqlite:///{0}/{1}/{1}.db".format(path,f_with_aneuploidy))
history.get_all_populations()[-3:]

Unnamed: 0,t,population_end_time,samples,epsilon,particles
4,3,2021-12-29 16:37:20.673694,16125,0.145762,1111
5,4,2021-12-29 21:50:47.240188,27629,0.131083,964
6,5,2021-12-30 08:00:52.829615,53803,0.126588,982


In [62]:
def sample_from_kde(file_name, size=5_000_000, t=None):
    history = pyabc.History("sqlite:///{0}/{1}/{1}.db".format(path, file_name))
    samples, weights = history.get_distribution(m=0, t=None)
    kde = scipy.stats.gaussian_kde(samples.T.values.tolist(),weights=weights)
    kdesamples = kde.resample(2*size).T
    if len(samples.columns)==2:
        kdesamples = pd.DataFrame(data=kdesamples,columns=['mutation rate', '2n* fitness'])
        kdesamples = kdesamples[(kdesamples['mutation rate']>1e-9) & (kdesamples['mutation rate']<1e-5)][:size]
    else:
        kdesamples = pd.DataFrame(data=kdesamples,columns=['mutation rate', 'trisomy rate', '2n+1 fitness', '2n+1* fitness', '2n* fitness'])
        kdesamples = kdesamples[(kdesamples['mutation rate']>1e-9) & (kdesamples['mutation rate']<1e-5) &
                  (kdesamples['trisomy rate']>1e-6) & (kdesamples['mutation rate']<1e-2)][:size]
    return kdesamples, kde

def roundd(v):
    return round(v,3)
        
def findd(n):
    for i in range(0,12):
        m = n*10**i
        if m>=1 and m<=10:
            return i
    raise ValueError(n)
           
# returns (MAP, low_hdi, high_hdi)
def calc_hdi(f, hdi_p=.50):
    samples, kde = sample_from_kde(f)
    MAP = scipy.optimize.minimize(lambda x: -kde.logpdf(x) if x[0]>1e-9 and x[0]<1e-5 
                                  and x[1]>1e-6 and x[1]<1e-2 else 100000
                                  ,samples.median().values)['x']
    hdi_res = hdi(samples.values, hdi_prob=hdi_p)
    hi = hdi_res.T[1]
    lo = hdi_res.T[0]
    return (MAP, lo, hi)

def print_mode_and_hdi(f, display_cols=['\mu','\delta','w_{2n+1}','w_{2n+1^*}', 'w_{2n^*}'], hdi_p=.5):
    ans = calc_hdi(f,hdi_p)
    orders = [np.array(list(map(findd,ans))) for ans in ans]
    singles = [list(map(roundd,ans*(10**np.array(list(map(findd,ans)))))) for ans in ans]
    for a in list(zip(list(display_cols),*singles, *orders)):
        if a[-1]==0:
            print('${}={}\\ [{}-{}]$,'.format(*a))
        else:
            print('${0}={1}\\cdot10^{{-{4}}}\\ [{2}\\cdot10^{{-{5}}}-{3}\\cdot10^{{-{6}}}]$,'.format(*a))
    

import warnings
warnings.filterwarnings('ignore')

In [69]:
print_mode_and_hdi(f_no_aneuploidy, display_cols = ['\mu', 'w_{2n^*}'])

$\mu=7.98\cdot10^{-9}\ [7.907\cdot10^{-9}-8.139\cdot10^{-9}]$,
$w_{2n^*}=1.013\ [1.012-1.013]$,


In [63]:
print_mode_and_hdi(f_with_aneuploidy, display_cols = ['\mu','\delta','w_{2n+1}','w_{2n+1^*}', 'w_{2n^*}'],hdi_p=.5)

$\mu=2.986\cdot10^{-6}\ [3.734\cdot10^{-7}-3.725\cdot10^{-6}]$,
$\delta=1.724\cdot10^{-3}\ [1.451\cdot10^{-3}-2.766\cdot10^{-3}]$,
$w_{2n+1}=1.022\ [1.021-1.023]$,
$w_{2n+1^*}=1.025\ [1.023-1.026]$,
$w_{2n^*}=1.028\ [1.026-1.029]$,


In [43]:
# w1,w2,w3 = calc_hdi(f_with_aneuploidy)[0][2:]
w1,w2,w3 = 1.022, 1.025, 1.028
s = w3 -1
c = 1-(w2-w1)/(w3-1) 
b = w3-1+c
round(s,3),round(b,3),round(1-c,3)

(0.028, 0.921, 0.107)

In [71]:
print_mode_and_hdi(f_tau10, display_cols = ['\mu','\delta','w_{2n+1}','w_{2n+1^*}', 'w_{2n^*}'],hdi_p=.5)

$\mu=1.66\cdot10^{-6}\ [9.033\cdot10^{-8}-2.353\cdot10^{-6}]$,
$\delta=1.483\cdot10^{-3}\ [1.162\cdot10^{-3}-2.492\cdot10^{-3}]$,
$w_{2n+1}=1.022\ [1.021-1.023]$,
$w_{2n+1^*}=1.025\ [1.023-1.026]$,
$w_{2n^*}=1.028\ [1.026-1.029]$,


In [70]:
print_mode_and_hdi(f_tau100, display_cols = ['\mu','\delta','w_{2n+1}','w_{2n+1^*}', 'w_{2n^*}'],hdi_p=.5)

$\mu=3.84\cdot10^{-7}\ [6.53\cdot10^{-8}-6.207\cdot10^{-7}]$,
$\delta=7.636\cdot10^{-4}\ [4.753\cdot10^{-4}-1.818\cdot10^{-3}]$,
$w_{2n+1}=1.022\ [1.021-1.024]$,
$w_{2n+1^*}=1.024\ [1.023-1.026]$,
$w_{2n^*}=1.028\ [1.026-1.029]$,
