In [2]:
import collections
import itertools

# Our numerical workhorses
import numpy as np
import pandas as pd
import scipy.optimize
import scipy.stats as st

# Numerical differentiation packages
import numdifftools as ndt

# Our main MCMC package
import emcee

# Import pyplot for plotting
import matplotlib.pyplot as plt

# Seaborn, useful for graphics
import seaborn as sns

# Corner is useful for displaying MCMC results
import corner

# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline

# This enables high res graphics inline (only use with static plots (non-Bokeh))
# SVG is preferred, but there is a bug in Jupyter with vertical lines
%config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)

## 1A
Reversal in worms

In [None]:
# Load DataFrame
df = pd.read_csv('data/bi1x_2015_c_elegans_optogenetics.csv',
                comment='#')
df = pd.melt(df, id_vars=['Group', 'Worm'], value_vars=['WT', 'AVA', 'ASH'], 
             value_name='reversal', var_name='strain')


In [None]:
def revs_trials(df, strain):
    """
    Return number of reversals and number of trials.
    """
    inds = (df['strain'] == strain) & (df['reversal'] >= 0)
    n_r = df[inds]['reversal'].sum()
    n = df[inds]['reversal'].count()
    
    return n_r, n

def log_posterior(p, n_r, n):
    """
    Log posterior of reversal measurements.
    """

    # Zero probability of having p < 0 or p > 1
    if p < 0 or p > 1:
        return -np.inf
    
    return st.nbinom.logpmf(n_r, n, p).sum()




    


In [None]:
n_dim = 1        # number of parameters in the model (just p)
n_walkers = 50   # number of MCMC walkers
n_burn = 1000     # "burn-in" period to let chains stabilize
n_steps = 5000   # number of MCMC steps to take after burn-in


n_r, n = revs_trials(df, 'AVA')
p0 = np.empty((n_walkers, n_dim))
p0[:,0] = np.random.uniform(0,1, n_walkers)             # AVA
#p0[:,1] = np.random.uniform(0, 1, n_walkers)             # ASH

sampler = emcee.EnsembleSampler(n_walkers, n_dim, log_posterior, 
                                args=(n,n_r), threads=4)

# Do burn-in
pos, prob, state = sampler.run_mcmc(p0, n_burn, storechain=False)

In [None]:
#actually do MCMC
# Sample again, starting from end burn-in state
_ = sampler.run_mcmc(pos, n_steps)

In [None]:

# Get the index of the most probable parameter
max_ind = np.argmax(sampler.flatlnprobability)
#max_ind

# Pull out values.
AVA_MAP = sampler.flatchain[max_ind]
AVA_MAP
# Print the results
#print("""
#Most probable parameter value:
#p: {1:.1f}
#""".format(p_MAP))

In [None]:
n_dim = 1        # number of parameters in the model (n_r and p)
n_walkers = 50   # number of MCMC walkers
n_burn = 1000     # "burn-in" period to let chains stabilize
n_steps = 5000   # number of MCMC steps to take after burn-in

n_r, n = revs_trials(df, 'ASH')
p0 = np.empty((n_walkers, n_dim))


p0[:,0] = np.random.uniform(0,1, n_walkers)             # ASH


sampler = emcee.EnsembleSampler(n_walkers, n_dim, log_posterior, 
                                 args=(n,n_r), threads=4)

# Do burn-in
pos, prob, state = sampler.run_mcmc(p0, n_burn, storechain=False)

In [None]:
#actually do MCMC
# Sample again, starting from end burn-in state
_ = sampler.run_mcmc(pos, n_steps)

In [None]:
# Get the index of the most probable parameter
max_ind = np.argmax(sampler.flatlnprobability)
#max_ind

# Pull out values.
ASH_MAP = sampler.flatchain[max_ind]
ASH_MAP

In [None]:
AVA_MAP-ASH_MAP

## 1B
Dorsal gradients in wt vs venus fusion

In [3]:
# Load data
df = pd.read_csv('data/reeves_gradient_width_various_methods.csv', comment='#',header=[0,1])

# Check it out
df.head()

Unnamed: 0_level_0,wt,wt,dl1/+; dl-venus/+,dl1/+; dl-venus/+,dl1/+; dl-venus/+,dl1/+; dl-gfp/+,dl1/+; dl-gfp/+,dl1/+; dl-gfp/+
Unnamed: 0_level_1,wholemounts,cross-sections,anti-Dorsal,anti-Venus,Venus (live),anti-Dorsal,anti-GFP,GFP (live)
0,0.1288,0.1327,0.1482,0.1632,0.1666,0.2248,0.2389,0.2412
1,0.1554,0.1457,0.1503,0.1671,0.1753,0.1891,0.2035,0.1942
2,0.1306,0.1447,0.1577,0.1704,0.1705,0.1705,0.1943,0.2186
3,0.1413,0.1282,0.1711,0.1779,,0.1735,0.2,0.2104
4,0.1557,0.1487,0.1342,0.1483,,0.2135,0.256,0.2463


In [4]:
crosssection= df['wt']['cross-sections'].dropna()
venus_dorsal= df['dl1/+; dl-venus/+']['anti-Dorsal'].dropna()

# If we assume that the two distributions are independent,
From equation 3.87a in Sivia, the posterior should be the following
where z is the differnce in the spread between to two distributions

\begin{align}
P(Z, \mid D, I) = \frac{1}{\sigma_z \sqrt{2 \pi}}
\exp [-\frac{(Z-z_o)^2}{2\sigma_z^2}]
\end{align}




    


In [54]:


def log_post(a,b,Z):
    """
    Compute log of posterior for single set of parameters.

    """
    sigma_z = np.sqrt((np.std(a)**2) + (np.std(b)**2))
    z_0 = np.average(a)-np.average(b)
    
    return np.log(1/(sigma_z)*np.sqrt(2*np.pi))-(((Z-z_0)**2)/(2*(sigma_z**2)))

In [55]:
n_dim = 1        # number of parameters in the model (just p)
n_walkers = 50   # number of MCMC walkers
n_burn = 1000     # "burn-in" period to let chains stabilize
n_steps = 5000   # number of MCMC steps to take after burn-in


p0 = np.empty((n_walkers, n_dim))
p0[:,0] = np.random.uniform(-1,1, n_walkers)             # Z

sampler = emcee.EnsembleSampler(n_walkers, n_dim, log_post, 
                                args=(venus_dorsal,crosssection), threads=4)

# Do burn-in
pos, prob, states, blobs = sampler.run_mcmc(p0, n_burn, storechain=False)

In [56]:
#actually do MCMC
# Sample again, starting from end burn-in state
_ = sampler.run_mcmc(pos, n_steps)

In [57]:

# Get the index of the most probable parameter
max_ind = np.argmax(sampler.flatlnprobability)


# Pull out values.
delta_MAP = sampler.flatchain[max_ind]

delta_MAP

array([ 0.29318971])

## Conclusions
The MAP value for delta is about .29