In [1]:
import os, re

import discovery as ds

import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as pp

import jax
import jax.numpy as jnp

In [314]:
import imp

imp.reload(ds.matrix)
imp.reload(ds.signals)
imp.reload(ds.likelihood)
imp.reload(ds.prior)
imp.reload(ds.pulsar)
imp.reload(ds)

<module 'discovery' from '/Users/vallis/Documents/discovery/src/discovery/__init__.py'>

In [315]:
ds.config('numpy')

Read pulsar data

In [2]:
psrfiles = !ls ../data
psrs = [ds.Pulsar.read_feather(f'../data/{psrfile}') for psrfile in psrfiles[:10]
        if '-J' in psrfile or '-B' in psrfile]

Choose the simulation

In [3]:
rndseed = 42

Choose the outpsr

In [4]:
outpsr = 0
outpsrname = psrs[outpsr].name

How many predictive samples?

In [5]:
nsamples = 1

Get the chains

In [6]:
model2a = pd.read_feather('../data/NG15yr-m2a-chain.feather')
model3a = pd.read_feather('../data/NG15yr-m3a-chain.feather')

Simulate model 2a

In [321]:
Tspan = ds.getspan(psrs)

cbl = ds.GlobalLikelihood([ds.PulsarLikelihood([psr.residuals,
                                                ds.makenoise_measurement(psr, psr.noisedict),
                                                ds.makegp_ecorr(psr, psr.noisedict),
                                                ds.makegp_timing(psr, variance=1e-14),
                                                ds.makegp_fourier(psr, ds.powerlaw, 30, T=Tspan, name='red_noise'),
                                                ds.makegp_fourier(psr, ds.powerlaw, 14, T=Tspan, name='gw', common=['gw_gamma', 'gw_log10_A'])])
                           for ipsr, psr in enumerate(psrs)])

sample = cbl.sample

In [322]:
p0 = model2a.sample(1).iloc[0].to_dict()

In [323]:
p0['gw_gamma'] = 4.33
p0['gw_log10_A'] = -14.5

In [327]:
key = ds.matrix.jnpkey(rndseed)
key, ysim = sample(key, p0)

In [155]:
str_log10_A = re.sub('\.','p',re.sub('\-','m',str(p0['gw_log10_A'])))
dirname = f'sim_model2a_{str_log10_A}_{rndseed}'

In [164]:
os.makedirs(dirname, exist_ok=True) 

In [181]:
for psr, ypsr in zip(psrs, ysim):
    psr.residuals = np.asarray(ypsr)
    psr.save_feather(os.path.join(dirname, f'{psr.name}.feather'), noisedict = psr.noisedict)

In [None]:
del cbl, sample

Sample model 3a with vmap

In [7]:
Tspan = ds.getspan(psrs)

m3a = ds.GlobalLikelihood((ds.PulsarLikelihood([psr.residuals,
                                                ds.makenoise_measurement(psr, psr.noisedict),
                                                ds.makegp_ecorr(psr, psr.noisedict),
                                                ds.makegp_timing(psr, variance=1e-14),
                                                ds.makegp_fourier(psr, ds.powerlaw, 30, T=Tspan, name='red_noise')])
                           for ipsr, psr in enumerate(psrs)),
                          ds.makegp_fourier_global(psrs, ds.powerlaw, ds.hd_orf, 14, T=Tspan, name='gw'))

logl = m3a.logL
jlogl = jax.jit(logl)

In [228]:
imp.reload(ds.matrix)
imp.reload(ds.signals)
imp.reload(ds.likelihood)
imp.reload(ds)

<module 'discovery' from '/Users/vallis/Documents/discovery/src/discovery/__init__.py'>

Conditional probability for RN

In [248]:
Tspan = ds.getspan(psrs)

c2a = ds.GlobalLikelihood((ds.PulsarLikelihood([psr.residuals,
                                                ds.makenoise_measurement(psr, psr.noisedict),
                                                ds.makegp_ecorr(psr, psr.noisedict),
                                                ds.makegp_timing(psr, variance=1e-14)])
                           for ipsr, psr in enumerate(psrs)),
                          [ds.makegp_fourier_allpsr(psrs, ds.powerlaw, 30, T=Tspan, name='red_noise'),
                           ds.makegp_fourier_allpsr(psrs, ds.powerlaw, 14, T=Tspan, name='gw', common=['gw_gamma','gw_log10_A'])])

cond = c2a.conditional

In [249]:
p0 = ds.sample_uniform(cond.params)

In [250]:
cond(p0)

(Array([ 4.84484310e-07, -1.27513398e-06,  5.52733509e-08, -2.20688139e-07,
        -4.05751727e-07, -8.12314663e-07, -2.93361870e-07, -3.39583565e-11,
        -4.33668083e-07, -3.18686573e-07, -5.45891738e-07, -3.96501286e-08,
        -4.30421622e-07,  1.36575678e-07, -3.32272633e-07, -2.03202523e-09,
        -3.73466605e-07,  1.86148489e-07, -1.61370134e-07,  2.88355056e-07,
        -1.14155324e-07,  9.31001540e-08, -1.28406814e-07,  2.31633010e-07,
         2.84128251e-08,  1.74471132e-07,  1.90220438e-08,  4.76933499e-08,
         6.31238482e-09,  1.12766784e-07,  4.19697269e-08,  1.15715388e-08,
        -6.07233555e-09,  2.16169076e-08,  6.57022176e-09,  1.73040845e-08,
        -7.69248168e-09, -9.70066390e-09,  2.40620937e-09, -4.04423556e-09,
        -1.74872734e-09,  3.03877467e-09, -9.35881283e-09,  3.64973946e-09,
        -5.10885958e-09,  5.25237917e-09, -2.95190883e-09,  4.06005679e-09,
        -1.32557191e-09,  8.40793633e-10, -1.77773728e-09,  2.16439435e-10,
        -9.1

In [245]:
Tspan = ds.getspan(psrs)

c3a = ds.GlobalLikelihood((ds.PulsarLikelihood([psr.residuals,
                                                ds.makenoise_measurement(psr, psr.noisedict),
                                                ds.makegp_ecorr(psr, psr.noisedict),
                                                ds.makegp_timing(psr, variance=1e-14)])
                           for ipsr, psr in enumerate(psrs)),
                          [ds.makegp_fourier_allpsr(psrs, ds.powerlaw, 30, T=Tspan, name='red_noise'),
                           ds.makegp_fourier_global(psrs, ds.powerlaw, ds.hd_orf, 14, T=Tspan, name='gw')])

cond = c3a.conditional

In [246]:
p0 = ds.sample_uniform(cond.params)

In [247]:
cond(p0)

(Array([ 3.99582424e-07, -2.11087947e-06, -1.85151603e-07, -5.82081365e-07,
        -8.17459519e-07, -1.28211855e-06, -6.29352433e-07, -3.37878105e-07,
        -9.78031190e-07, -5.10611216e-07, -1.01808209e-06, -1.67771070e-07,
        -9.34929838e-07,  1.56546192e-07, -7.37507922e-07,  1.28624199e-07,
        -7.86074063e-07,  4.02807930e-07, -4.12296751e-07,  5.39048328e-07,
        -4.23061301e-07,  3.97919066e-07, -2.24449260e-07,  5.72725627e-07,
        -4.66927132e-08,  5.43498215e-07,  5.69036044e-08,  2.81256318e-07,
         4.22390610e-08,  4.69022909e-07,  2.38990479e-07,  1.21452665e-07,
         6.65845420e-08,  2.47025133e-07,  1.74888856e-07,  8.26667880e-08,
         1.24998800e-07,  5.66811016e-08,  5.00956147e-08, -1.53697388e-08,
         1.85630379e-07,  4.30239296e-08, -6.52925571e-08, -3.92124254e-08,
         5.08955366e-08,  1.73497787e-08, -4.01509076e-08,  3.76220432e-08,
         4.09941284e-08, -5.05844542e-08, -5.12216839e-08,  3.65514481e-08,
        -4.6

Model-3a conditional predictive 

In [330]:
Tspan = ds.getspan(psrs)

outnoise = ds.makenoise_measurement_simple(psrs[outpsr], {f'{outpsrname}_efac': 1.0,
                                                          f'{outpsrname}_log10_t2equad': 0.0})

gbl = ds.GlobalLikelihood([ds.PulsarLikelihood([psr.residuals,
                                                outnoise if ipsr == outpsr else ds.makenoise_measurement(psr, psr.noisedict),
                                                ds.makegp_ecorr(psr, psr.noisedict),
                                                ds.makegp_timing(psr, variance=1e-14),
                                                ds.makegp_fourier(psr, ds.powerlaw, 30, T=Tspan, name='red_noise')])
                           for ipsr, psr in enumerate(psrs)],
                          ds.makegp_fourier_global(psrs, ds.powerlaw, ds.hd_orf, 14, T=Tspan, name='gw'))

cond = gbl.conditional

In [13]:
# check that we have all the parameters
# [p for p in cond.params if p not in chain.columns]

outpsr model3a likelihood 

In [293]:
psr = psrs[outpsr]
spl3 = ds.PulsarLikelihood([psr.residuals,
                            ds.makenoise_measurement(psr, psr.noisedict),
                            ds.makegp_ecorr(psr, psr.noisedict),
                            ds.makegp_timing(psr, variance=1e-14),
                            ds.makegp_fourier(psr, ds.powerlaw, 30, T=Tspan, name='red_noise'),
                            ds.makegp_fourier_delay(psr, 14, T=Tspan, name='gw'),
                            ds.makegp_fourier_variance(psr, 14, T=Tspan, name='gw')
                            ])

logl3 = spl3.logL
jlogl3 = jax.jit(logl3)

In [294]:
outpsrrange  = slice(outpsr*28, (outpsr+1)*28)

outpsrmean = f'{outpsrname}_gw_mean(28)'
outpsrvar  = f'{outpsrname}_gw_variance(28,28)'

outpsrgamma  = f'{outpsrname}_red_noise_gamma'
outpsrlog10A = f'{outpsrname}_red_noise_log10_A'

In [298]:
def makeppl(logl, chain, conditional=True, accelerate=True):
    index, records = [], []
    for i, row in chain.sample(nsamples).iterrows():
        par = row.to_dict()

        if conditional:    
            cm, cl = cond(par)
            
            par[outpsrmean] = cm[outrange]
            par[outpsrvar] = ds.matrix.jsp.linalg.cho_solve(cl, jnp.identity(len(cm))[:, outrange])[outrange, :]

            if accelerate:
                # could cast this in PulsarLikelihood
                ym = np.array(ds.makegp_fourier_delay(psr, 14, T=Tspan, name='gw')(par))
                spl3 = ds.PulsarLikelihood([psr.residuals - ym,
                                            ds.makenoise_measurement(psr, psr.noisedict),
                                            ds.makegp_ecorr(psr, psr.noisedict),
                                            ds.makegp_timing(psr, variance=1e-14),
                                            ds.makegp_fourier(psr, ds.powerlaw, 30, T=Tspan, name='red_noise'),
                                            ds.makegp_fourier_variance(psr, 14, T=Tspan, name='gw', noisedict=par)
                                            ])
                
                logl = jax.jit(spl3.logL)
        
        def ufunc(gamma, log10_A):
            return float(logl({**par, outpsrgamma: gamma, outpsrlog10A: log10_A}))
    
        norm = ufunc(4.0, -14.0)
        def nfunc(gamma, log10_A):
            return np.exp(ufunc(gamma, log10_A) - norm)
    
        index.append(i)
        records.append({'gw_gamma': par.get('gw_gamma', np.nan),
                        'gw_log10_A': par['gw_log10_A'],
                                              # note func(y, x), with limits given for x, y
                        'logl': norm + np.log(sp.integrate.dblquad(nfunc, -20, -11, 0, 7)[0])})
    
    return pd.DataFrame(records, index=index)

In [301]:
%time df = makeppl(logl3, model3a, conditional=True, accelerate=True)

CPU times: user 2.94 s, sys: 33.6 ms, total: 2.97 s
Wall time: 1.69 s


In [289]:
%time df = makeppl(logl3, model3a, conditional=True, accelerate=True)

CPU times: user 2.21 s, sys: 36.5 ms, total: 2.25 s
Wall time: 1.23 s


In [None]:
%time df = makeppl(jlogl3, model3a, conditional=True)

In [None]:
df.to_feather(f'{outpsrname}_ppl_model3a.feather')

In [None]:
del spl3, logl3, jlogl3

outpsr model2a likelihood 

In [210]:
spl2 = ds.PulsarLikelihood([psr.residuals,
                            ds.makenoise_measurement(psr, psr.noisedict),
                            ds.makegp_ecorr(psr, psr.noisedict),
                            ds.makegp_timing(psr, variance=1e-14),
                            ds.makegp_fourier(psr, ds.powerlaw, 30, T=Tspan, name='red_noise'),
                            ds.makegp_fourier(psr, ds.powerlaw, 14, T=Tspan, name='gw', common=['gw_log10_A', 'gw_gamma'])
                            ])

logl2 = spl2.logL
jlogl2 = jax.jit(logl2)

In [211]:
%time df = makeppl(logl2, model2a, conditional=False)

CPU times: user 187 ms, sys: 336 ms, total: 523 ms
Wall time: 300 ms


In [None]:
df.to_feather(f'{outpsrname}_ppl_model2a.feather')