# Attributions

**Chris:** Bulk of problem

**Emily:** Added a few lines of explanation

**Naveen:** Looked over final answer

In [1]:
import glob
import os

import numpy as np
import pandas as pd
import scipy.signal
import scipy

# Image processing tools
import skimage
import skimage.io

# MCMC tools
import pymc3 as pm
import theano.tensor as tt
import theano

import bebi103

import bokeh
bokeh.io.output_notebook()

import ipywidgets

Let's load in the image data and take a look at the number of frames.

In [2]:
# Load in TIFF stack
fname = '../data/goehring_FRAP_data/PH_138_A.tif'
ic = skimage.io.ImageCollection(fname, conserve_memory=False)[0]

# How long is it?
print('There are {0:d} frames.'.format(len(ic)))

There are 149 frames.


Taking constants from the README file:

In [3]:
# Image resolution in um/px
RES = .138

# Bleach time (first frame)
FIRST_BLEACH = 21

# Time interval between frames
T_INTERVAL = 0.188

Let's load the images and use the interactive clicking to locate the vertices of the ROIs. We have recorded them after clicking, for our convenience, and cropped the images.

In [4]:
# Time intervals of each TIFF stack
times = np.linspace(0, (len(ic) - 1) * T_INTERVAL, len(ic)) - (FIRST_BLEACH - 1) * T_INTERVAL

# Load images
data_loc = '../data/goehring_FRAP_data'
im_glob = os.path.join(data_loc, '*.tif')
im_list = glob.glob(im_glob)

imgs = skimage.io.ImageCollection(im_list, conserve_memory=False)

# for img in imgs:
#     bokeh.io.show(bebi103.viz.imshow(img[20], flip=False, record_clicks=True))

# Vertices from photobleached squares

roiVerts = [np.array([[16.7912, 10.7605], [13.9775, 59.7350], [66.6332, 60.1005], [68.6430, 11.8569]]),
            np.array([[45.3298, 78.0776], [96.3777, 76.6156], [96.3777, 25.8138], [46.5356, 27.2757]]),
            np.array([[50.5551, 66.4507], [99.5933, 63.8923], [97.9855, 15.2833], [50.1532, 17.1107]]),
            np.array([[58.9961, 97.9506], [103.6128, 97.9506], [108.8382, 50.0726], [61.8098, 49.7071]]),
            np.array([[19.6048, 111.1764], [69.4469, 108.9836], [72.6625, 60.0091], [22.4185, 59.6436]]),
            np.array([[15.1834, 104.6663], [64.6235, 105.3973], [67.8391, 56.0573], [17.5951, 54.9609]]),
            np.array([[15.9873, 112.0445], [67.0352, 110.9480], [70.6528, 64.8974], [20.8107, 60.5116]]),
            np.array([[21.2127, 104.0724], [70.6528, 104.0724], [74.6723, 57.6563], [25.2322, 56.5599]])]

roi = [bebi103.image.verts_to_roi(verts, *imgs[0][0].shape)[1] for verts in roiVerts]

# Now get the cropped images based on the ROI
cropped = []

for index, img in enumerate(imgs):
    cropped.append(np.array([frame[roi[index]] for frame in img]))

We'll now calculate the average fluorescence and normalize it for comparison.

In [5]:
# Now find the average fluorescence of each frame of each TIFF stack
avg_fluor = [np.mean(image, axis=(1, 2)) for image in cropped]

# Normalize each stack according to mean fluorescence before bleaching
norm_fluor = [stack / np.mean(stack[:FIRST_BLEACH-1]) for stack in avg_fluor]

# Plot our normalized fluorescences
plots = []
names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
    
for index, stack in enumerate(norm_fluor):
    p = bokeh.plotting.figure(plot_width=500,
                              plot_height=400,
                              x_axis_label='Time (s)',
                              y_axis_label='Fluorescence (AU)',
                              title=names[index])
    p.circle(times, stack)
    plots.append(p)
    
bokeh.io.show(bokeh.layouts.gridplot(plots, ncols=2))

We can see that after photobleaching, the normalized fluorescence drops to around 0.2 to 0.3. When they recover, they all plateau around 0.9 to 1. None of them are noticeably extremely worse or extremely better at recovery, in terms of the slope (speed of recovery ~same).

**b)** Now we'll try using MCMC for parameter estimation of D and k_off. We do this by first modeling each experiment as independent and assuming that the difference between our modeled normalized mean fluorescence and the experimental normalized mean fluorescence is normally distributed (central limit theorem). It doesn't matter so much whether we use uniform or Jeffreys, as sated in a number of previous homeworks. However, since don't care about $f_f$ and $f_b$ and they're proportions/fractions, we'll use a uniform distribution, while $D, k_{off}, \sigma$ will be Jeffreys, as they all affect scaling.

First, we'll write a function to calculate the theoretical mean fluorescence:

In [6]:
def thr_fluor(t, D, k_off, ff, fb, di):
    """ Calculate the theoretical normalized fluorescence at that time """
    
    # Initialize product of psi functions
    psi = 1
    
    # Calculate psi
    for dim in di:
        psi *= ( dim / 2 * pm.math.erf(dim / tt.sqrt(4 * D * t)) - 
                 tt.sqrt(D * t / np.pi) * (1 - tt.exp(- dim ** 2 / (4 * D * t))) )
        
    # Calculate final value for I_norm(t)
    return ff * (1 - psi * fb * (4 * tt.exp(-k_off * t)) / (pm.math.prod(dim)))
               
def sample_fluor(times, norm_fluor):
    """ Sample the normalized fluorescence values and find the most common parameters """
    
    with pm.Model() as norm_model:
        # Priors
        D = bebi103.pm.Jeffreys('D', lower=1/60, upper=6)
        k_off = bebi103.pm.Jeffreys('k_off', lower=1/60, upper=6)
        sd = bebi103.pm.Jeffreys('sd', lower=1/60, upper=6)
        ff = pm.Uniform('ff', lower=0, upper=1)
        fb = pm.Uniform('fb', lower=0, upper=1)

        di = np.array([40 * RES, 40 * RES])

        # Theoretical value
        mu = thr_fluor(times, D, k_off, ff, fb, di)

        # Likelihood
        n_obs = pm.Normal('n_obs', mu=mu, sd=sd, observed=norm_fluor)
        
        # Draw samples
        trace = pm.sample(init='advi+adapt_diag', draws=10000, tune=2000, njobs=2)
        
        # Convert to data frame
        df_mcmc = bebi103.pm.trace_to_dataframe(trace, log_post=True)
        
        return df_mcmc, trace, norm_model
    

In [64]:
df = []
trace = []

for index, image in enumerate(norm_fluor):
    df_curr, trace_curr, _ = sample_fluor(times[FIRST_BLEACH:], image[FIRST_BLEACH:]) 
    df.append(df_curr)
    trace.append(trace_curr)

Auto-assigning NUTS sampler...
Initializing NUTS using advi+adapt_diag...
Average Loss = -336.64:  13%|█▎        | 25199/200000 [00:39<04:26, 655.70it/s]
Convergence archived at 25200
Interrupted at 25,199 [12%]: Average Loss = -141.27
Auto-assigning NUTS sampler...
Initializing NUTS using advi+adapt_diag...
Average Loss = -313.49:  12%|█▏        | 24089/200000 [00:37<04:32, 645.02it/s]
Convergence archived at 24100
Interrupted at 24,099 [12%]: Average Loss = -120.56
Auto-assigning NUTS sampler...
Initializing NUTS using advi+adapt_diag...
Average Loss = -316.57:  12%|█▏        | 24258/200000 [00:48<08:18, 352.77it/s]
Convergence archived at 24300
Interrupted at 24,299 [12%]: Average Loss = -119.61
Auto-assigning NUTS sampler...
Initializing NUTS using advi+adapt_diag...
Average Loss = -332.36:  12%|█▏        | 23851/200000 [00:47<03:30, 835.08it/s]
Convergence archived at 23900
Interrupted at 23,899 [11%]: Average Loss = -138.38
Auto-assigning NUTS sampler...
Initializing NUTS using a

In [71]:
def thr_fluor_np(t, D, k_off, ff, fb, di):
    """ Calculate the theoretical normalized fluorescence at that time using np """
    
    # Initialize product of psi functions
    psi = 1
    
    # Calculate psi
    for dim in di:
        psi *= ( dim / 2 * scipy.special.erf(dim / np.sqrt(4 * D * t)) - 
                 np.sqrt(D * t / np.pi) * (1 - np.exp(- dim ** 2 / (4 * D * t))) )
        
    # Calculate final value for I_norm(t)
    return ff * (1 - psi * fb * (4 * np.exp(-k_off * t)) / (np.prod(dim)))

plots = []
tot_vals = np.array([])

mean = []
sd = []

for index, image in enumerate(df):
    
    # Get the index of the most probable parameter set
    max_ind = image['log_posterior'].idxmax()

    D, ff, fb, k_off = image.loc[max_ind, ['D', 'ff', 'fb', 'k_off']]
    
    # Get the means
    mean.append((D, k_off))
    
    # Get the standard deviations
    sd.append((pm.stats.mc_error(image['D']), pm.stats.mc_error(image['k_off'])))
    
    if index == 0:
        tot_vals = image[['D', 'ff', 'fb', 'k_off']].values
        
    else:
        tot_vals = np.append(tot_vals, image[['D', 'ff', 'fb', 'k_off']].values, axis=0)
    
    print("""
          Image {0:s}:
          ---------
          D = {1:.3f}, ff = {2:.3f}, fb = {3:.3f}, k_off = {4:.3f}
          """.format(names[index], D, ff, fb, k_off))

    p = bokeh.plotting.figure(width=500,
                              height=400,
                              x_axis_label='Time (s)',
                              y_axis_label='Normalized mean fluorescence',
                              title=names[index])

    p.line(times[FIRST_BLEACH:], thr_fluor_np(times[FIRST_BLEACH:], D, k_off, ff, fb, np.array([40 * RES, 40 * RES])))

    p.circle(times[FIRST_BLEACH-1:], norm_fluor[index][FIRST_BLEACH-1:])
    
    plots.append(p)

bokeh.io.show(bokeh.layouts.gridplot(plots, ncols=2))


          Image A:
          ---------
          D = 0.876, ff = 0.938, fb = 0.142, k_off = 0.176
          

          Image B:
          ---------
          D = 1.381, ff = 0.979, fb = 0.148, k_off = 0.064
          

          Image C:
          ---------
          D = 0.979, ff = 0.978, fb = 0.146, k_off = 0.116
          

          Image D:
          ---------
          D = 1.064, ff = 0.918, fb = 0.142, k_off = 0.189
          

          Image E:
          ---------
          D = 0.426, ff = 0.905, fb = 0.134, k_off = 0.209
          

          Image F:
          ---------
          D = 0.514, ff = 0.914, fb = 0.135, k_off = 0.245
          

          Image G:
          ---------
          D = 0.887, ff = 0.957, fb = 0.136, k_off = 0.150
          

          Image H:
          ---------
          D = 0.223, ff = 0.900, fb = 0.128, k_off = 0.342
          


We can see from the above plots that all of the experiments yield a result similar to that predicted by the model. As a result, we can probably treat the data as independent samples and propagate error by assuming a Gaussian distribution for the independent samples and using error propagation derived in problem 3.1:

In [72]:
sigma_D = 1 / np.sum([1 / sig[0]**2 for sig in sd])
sigma_k = 1 / np.sum([1 / sig[1]**2 for sig in sd])

mu_D = sigma_D * np.sum([mean[i][0] / sd[i][0]**2 for i in range(len(mean))])
mu_k = sigma_k * np.sum([mean[i][1] / sd[i][1]**2 for i in range(len(mean))])

Printing our result:

In [79]:
print("""
      D = {0:.3f} ± {1:.3f}
      K = {2:.3f} ± {3:.3f}""".format(mu_D, sigma_D, mu_k, sigma_k))


      D = 0.772 ± 0.000
      K = 0.167 ± 0.000


Therefore, our final preducted median value of $D$ is 0.772, and $k_{off}$ is 0.167. Comparing the $D$ values to the paper, it looks on the low end, but this might change due to method and how we defined our ROIs, a testament to different conclusions based on small changes in data analysis! However, just from our plot, we can see that the fit does seem appropriate for the data, and the total standard deviation of the two were on the order of 10e-6. 