In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy.io as sio

import os, sys
root_dir = os.path.dirname(os.path.abspath(''))
if not root_dir in sys.path: sys.path.append(root_dir)

In [2]:
import pymc3 as pm
import arviz as az
import theano.tensor as tt

In [3]:
from inference import *
from DM_theory import *
from empirical.readData import *
from empirical.model import *

In [6]:
def run_inference(filePath='../../data/BuscheLab/2P_data.xlsx',tune=20000,draws=10000,include_silent=False,
                 loadPath=None,savePath=None):
    I = Inference()
    I.load_data('empirical',filePath=filePath,include_silent=include_silent)
    I.set_model('selfcon')
    I.run_on_data(tune=tune,draws=draws,loadPath=loadPath)
    return I

I = run_inference(include_silent=True,loadPath='results_xls_trace.nc')

expanding @ lvl animal with keys: ['20180325 (app ko7 739)' '20180325A (app ko8 740)'
 '20180402A (app ko13 734)' '20180419 (app ko14 756)'], selectors: ('LM (APLP1 KO)',)
expanding @ lvl animal with keys: ['20180310A' '20180310B' '20180312A' '20180401'], selectors: ('WT',)
zeros in data: 1480


  keys = np.unique(list(data[selector].columns.get_level_values(this_level)))
  keys = np.unique(list(data[selector].columns.get_level_values(this_level)))


In [11]:
from scipy.stats import binom
from scipy.integrate import quad

import theano
import theano.tensor as T
from theano.graph.op import Op
from theano.graph.basic import Apply

class integrateOut(Op):
    """
    Integrate out a variable from an expression, computing
    the definite integral w.r.t. the variable specified
    !!! Only implemented in this for scalars !!!


    Parameters
    ----------
    f : scalar
        input 'function' to integrate
    t : scalar
        the variable to integrate out
    t0: float
        lower integration limit
    tf: float
        upper integration limit

    Returns
    -------
    scalar
        a new scalar with the 't' integrated out

    Notes
    -----
    
    function taken from https://stackoverflow.com/questions/42678490/custom-theano-op-to-do-numerical-integration
    
    usage of this looks like:
    x = T.dscalar('x')
    y = T.dscalar('y')
    t = T.dscalar('t')

    z = (x**2 + y**2)*t

    # integrate z w.r.t. t as a function of (x,y)
    intZ = integrateOut(z,t,0.0,5.0)(x,y)
    gradIntZ = T.grad(intZ,[x,y])

    funcIntZ = theano.function([x,y],intZ)
    funcGradIntZ = theano.function([x,y],gradIntZ)

    """
    def __init__(self,f,t,t0,tf,*args,**kwargs):
        super(integrateOut,self).__init__()
        self.f = f
        self.t = t
        self.t0 = t0
        self.tf = tf

    def make_node(self,*inputs):
        self.fvars=list(inputs)
        # This will fail when taking the gradient... don't be concerned
        try:
            self.gradF = T.grad(self.f,self.fvars)
        except:
            self.gradF = None
        return Apply(self,self.fvars,[T.dscalar().type()])

    def perform(self,node, inputs, output_storage):
        # Everything else is an argument to the quad function
        args = tuple(inputs)
        # create a function to evaluate the integral
        f = theano.function([self.t]+self.fvars,self.f)
        # actually compute the integral
        output_storage[0][0] = quad(f,self.t0,self.tf,args=args)[0]

    def grad(self,inputs,grads):
        return [integrateOut(g,self.t,self.t0,self.tf)(*inputs)*grads[0] \
            for g in self.gradF]


In [12]:
import math
from theano import pp

class binomial(Op):

    def __init__(self,N,k,*args,**kwargs):
        super(binomial,self).__init__()
        
        self.N = N
        self.k = k

    def make_node(self,*inputs):
        self.fvars=list(inputs)
        # This will fail when taking the gradient... don't be concerned
        #try:
        #self.gradF = math.comb(self.N,self.k)#T.grad(self.f,self.fvars)
        #except:
        #    self.gradF = None
        return Apply(self,self.fvars,[T.dscalar().type()])

    def perform(self,node,inputs,output_storage):
        # create a function to evaluate the integral
        p = inputs[0]
        
        # actually compute the binomial
        output_storage[0][0] = binom.pmf(self.k,self.N,p)
        
    def grad(self,inputs,grads):
        p = inputs[0]
        print(f'inputs: {inputs}')
        print(f'grads: {grads}')
        #return [math.comb(self.N,self.k) * p**(self.k-1)* (1-p)**(self.N-self.k-1)*(self.k-self.N*p) * f
        #    for f in grads]
        #return inputs
        return [binomial(self.N,self.k)(p) * (self.k-self.N*p)/(p * (1-p)) * g
               for g in grads]

In [9]:
def logp_fun(data,gamma,delta,nu_max):
    
    p_silent_distr = p_nu(nu,gamma,delta,nu_max)*np.exp(-nu*T_total)#*poisson(nu,N_AP,T_total)
    p_silent = integrateOut(p_silent_distr,nu,0,10.)(gamma,delta,nu_max,T_total)#,N_AP,T_total)
    
    p_N_AP = binomial(N_total,10)(p_silent)
    
    
    if type(data)==np.array:
        data_silent = data==0
        N_silent = data_silent.sum()
        data = data[data_silent]

        p_silent = integrate.quad(lambda nu : p_nu(nu,gamma,delta,nu_max)*np.exp(-nu*T),0,10)

    scaled_NU = tt.log(data / nu_max)
    return - tt.log( nu_max / gamma * tt.sqrt( -np.pi * scaled_NU ) ) - delta**2 / 2 + \
        ( gamma**2 - 1 ) * scaled_NU + \
        tt.log( tt.cosh( gamma * delta * tt.sqrt( -2 * scaled_NU ) ) )

In [7]:
data_observed = I.data[I.data_mask]
#silent = tt.le(data_observed,1./600)
with pm.Model() as model:
    # replace normals with student-t distributions

    priors = {}
    for para in I.paras:
        priors[para] = I.construct_model_hierarchical(para)
        priors[para] = priors[para][I.data_mask]


    def likelihood(data):

        # introduce checks for consistency, etc
        logP = logp_fun(data,**priors)

        # penalize nan-entries (e.g. when log is negative, etc)
        # logP_masked = tt.switch(tt.isnan(logP), 0, logP)
        # logP = tt.switch(silent, logP-10., logP)
        #min_val = tt.min(logP_masked)
        #tt.printing.Print('logP minimum')(tt.min(logP_masked))
        tt.printing.Print('logP')(logP)

        #logP = tt.switch(tt.isnan(logP), min_val*2, logP)

        return tt.sum(logP)

    ## watch out: for some reason, NaNs in observed data are converted to 0s
    logP = pm.DensityDist('logP',likelihood,observed=data_observed)

    #trace = pm.sample(
    #    init='adapt_diag',
    #    step=pm.Metropolis(),
    #    chains=4,draws=draws,tune=tune,
    #    return_inferencedata=True,
    #    **kwargs)

    #if savePath:
    #    trace.to_netcdf(savePath)

    #I.trace = trace

name: gamma
{'mu': 1.5, 'sigma': 1.0, 'sigma_animal': 1.0, 'prior': 'Normal'}
prior animal (final) __str__ = [370  15]
name: delta
{'mu': 4.0, 'sigma': 2.0, 'sigma_animal': 1.0, 'prior': 'Normal'}
prior animal (final) __str__ = [370  15]
name: nu_max
{'mu': 60.0, 'sigma': 20.0, 'sigma_animal': 5.0, 'prior': 'Normal'}
prior animal (final) __str__ = [370  15]
logP __str__ = [ 0.21456278  0.20072879  0.2143889  ... -3.56940472 -3.6462295
 -3.79297369]
logP __str__ = [ 0.21456278  0.20072879  0.2143889  ... -3.56940472 -3.6462295
 -3.79297369]
logP __str__ = [ 0.21456278  0.20072879  0.2143889  ... -3.56940472 -3.6462295
 -3.79297369]


NameError: name 'trace' is not defined