# Example - 2CDE Method

*This notebook is part of smFRET burst analysis software [FRETBursts](http://tritemio.github.io/FRETBursts/).*

> **This notebook is experimental! Implementation needs more tests.** Here we implement the 2CDE method from [Tomov 2012](http://dx.doi.org/10.1016%2Fj.bpj.2011.11.4025).

# Load Data

In [None]:
from fretbursts import *
from fretbursts.phtools import phrates
sns = init_notebook()

In [None]:
url = 'http://files.figshare.com/2182601/0023uLRpitc_NTP_20dT_0.5GndCl.hdf5'
download_file(url, save_dir='./data')

In [None]:
filename = "data/0023uLRpitc_NTP_20dT_0.5GndCl.hdf5"

In [None]:
d = loader.photon_hdf5(filename)
#bpl.plot_alternation_hist(d)
loader.alex_apply_period(d)
d.calc_bg(fun=bg.exp_fit, time_s=20, tail_min_us='auto', F_bg=1.7)
d.burst_search()

In [None]:
ds1 = d.select_bursts(select_bursts.size, th1=20)
ds = ds1.select_bursts(select_bursts.naa, th1=20)

alex_jointplot(ds)

# Photon rates

In [None]:
ph = d.ph_times_m[0]

In [None]:
tau = 100e-6/d.clk_p
tau

# KDE considerations

In [None]:
tau = 1

In [None]:
t = np.arange(0, 14)

In [None]:
tx = np.arange(0, t.max(), 0.01*(t[1] - t[0]))
r, n = phrates.kde_laplace_nph_numba(t, tau, tx)
_, nn3 = phrates.kde_laplace_nph_numba(t, 3*tau/5, tx)
#r2, n2 = phrates.kde_laplace_numba2(t, tau, tx)
r3 = phrates.kde_gaussian_numba(t, tau, tx)

fig, ax = plt.subplots(figsize=(17, 4))
plot(tx, r/(2*tau))
plot(tx, n/(10*tau))
plot(tx, r3/(2.5*tau))
plot(tx, nn3/(6*tau))
sns.distplot(t, rug=True, kde=False, hist=False)

In [None]:
t = np.arange(0, 14, 2)
tx = np.arange(0, t.max(), 0.01*(t[1] - t[0]))
r, n = phrates.kde_laplace_nph_numba(t, tau, tx)
_, nn3 = phrates.kde_laplace_nph_numba(t, 3*tau/5, tx)
#r2, n2 = phrates.kde_laplace_numba2(t, tau, tx)
r3 = phrates.kde_gaussian_numba(t, tau, tx)

fig, ax = plt.subplots(figsize=(17, 4))
plot(tx, r/(2*tau))
plot(tx, n/(10*tau))
plot(tx, r3/(2.5*tau))
plot(tx, nn3/(6*tau))
sns.distplot(t, rug=True, kde=False, hist=False)

In [None]:
np.random.seed(1)
t = np.random.rand(14)*14
t.sort()

In [None]:
tx = np.arange(0, t.max(), 0.01*(t[1] - t[0]))
r, n = phrates.kde_laplace_nph_numba(t, tau, tx)
_, nn3 = phrates.kde_laplace_nph_numba(t, 3*tau/5, tx)
r3 = phrates.kde_gaussian_numba(t, tau, tx)

fig, ax = plt.subplots(figsize=(17, 4))
plot(tx, r/(2*tau))
plot(tx, n/(10*tau))
plot(tx, r3/(2.5*tau))
plot(tx, nn3/(6*tau))
sns.distplot(t, rug=True, kde=False, hist=False)

In [None]:
tau = 1
tau2 = 2 * (tau**2)

xx = np.arange(-4*tau, 4*tau, tau/100)
y1 = np.exp(-np.abs(xx) / tau)
y2 = np.exp(-xx**2 / tau2)

In [None]:
plt.plot(xx,y1, label=r'$\exp \left( - \frac{|t|}{\tau} \right)$')
plt.plot(xx, y2, label=r'$\exp \left( - \frac{t^2}{2\tau^2} \right)$')
plt.axvline(2*tau, color='k')
plt.axvline(-2*tau, color='k')
plt.xlabel('t')
plt.legend(loc='best', fontsize=22)
plt.title(r'$\tau = %d$' % tau, fontsize=22);

In [None]:
np.exp(-5)*2

## KDE Observations

The Gaussian kernel gives a more accurate rate estimation with very little dependence on the position where the KDE is evaluated. On the contrary, with symmetric exponential kernel (laplace distribution), there is always a strong dependence on the evaluation position. In particular, when rates are estimated at the timestamps positions, the rates are systematically over-estimated (i.e. the peak is always samples).

For Gaussian kernel, given a $\tau$, the rate extimation will be accurate for rates higher than $1/(2\,\tau)$ counts-per-second. For lower rates, the estimation will strongly depend on where the KDE is evaluated. A similar condition can be also found for the exponential kernel, but the estimation will aways be strongly dependent on the position.

# Cython

In [None]:
%load_ext Cython

In [None]:
%%cython

import numpy as np
cimport numpy as np
from libc.math cimport exp, fabs

ctypedef np.int64_t DTYPE_t


def _kde_gaussian_cy(DTYPE_t[:] timestamps, DTYPE_t tau, DTYPE_t[:] time_axis):
    cdef np.int64_t timestamps_size, ipos, ineg, it, itx
    cdef np.float64_t[:] rates
    cdef np.float64_t tau2
    cdef DTYPE_t tau_lim, t
    
    timestamps_size = timestamps.size
    rates = np.zeros((time_axis.size,), dtype=np.float64)
    tau_lim = 3 * tau   # 3 tau = 99.7 % of the Gaussian
    tau2 = 2 * (tau**2)

    ipos, ineg = 0, 0  # indexes for timestamps
    for it in range(time_axis.size):
        t = time_axis[it]
        while ipos < timestamps_size and timestamps[ipos] - t < tau_lim:
            ipos += 1
        while ineg < timestamps_size and t - timestamps[ineg] > tau_lim:
            ineg += 1

        for itx in range(ineg, ipos):
            rates[it] += exp(-((timestamps[itx] - t)**2)/tau2)

    return rates


def _kde_laplace_cy(DTYPE_t[:] timestamps, DTYPE_t tau, DTYPE_t[:] time_axis):
    cdef np.int64_t timestamps_size, ipos, ineg, it, itx
    cdef np.float64_t[:] rates
    cdef DTYPE_t tau_lim, t
    
    timestamps_size = timestamps.size
    rates = np.zeros((time_axis.size,), dtype=np.float64)
    tau_lim = 5 * tau

    ipos, ineg = 0, 0  # indexes for timestamps
    for it in range(time_axis.size):
        t = time_axis[it]
        while ipos < timestamps_size and timestamps[ipos] - t < tau_lim:
            ipos += 1
        while ipos < timestamps_size and t - timestamps[ineg] > tau_lim:
            ineg += 1

        for itx in range(ineg, ipos):
            rates[it] += exp(-fabs(timestamps[itx] - t)/tau)

    return rates

In [None]:
def kde_gaussian_cy(timestamps, tau, time_axis=None):
    if time_axis is None:
        time_axis = timestamps
    return _kde_gaussian_cy(timestamps, tau, time_axis)

def kde_laplace_cy(timestamps, tau, time_axis=None):
    if time_axis is None:
        time_axis = timestamps
    return _kde_laplace_cy(timestamps, tau, time_axis)

In [None]:
dtype = np.int64

In [None]:
phf = ph.astype(dtype)

In [None]:
ratesn = phrates.kde_gaussian_numba(ph, dtype(tau))

In [None]:
ratesc = kde_gaussian_cy(ph, dtype(tau))

In [None]:
assert np.allclose(ratesn, ratesc)

In [None]:
%timeit kde_gaussian_cy(ph, dtype(tau))

In [None]:
%timeit phrates.kde_gaussian_numba(ph, dtype(tau))

In [None]:
ratesn = phrates.kde_laplace_numba(ph, dtype(tau))

In [None]:
ratesc = kde_laplace_cy(ph, dtype(tau))

In [None]:
assert np.allclose(ratesn, ratesc)

In [None]:
%timeit kde_laplace_cy(ph, dtype(tau))

In [None]:
%timeit phrates.kde_laplace_numba(ph, tau)

# nbKDE

In [None]:
ph

In [None]:
tau = 8000

In [None]:
ratesnn, nphn = phrates.kde_laplace_nph_numba(ph, tau)

In [None]:
ratesn = phrates.kde_laplace_numba(ph, tau)

In [None]:
np.allclose(ratesn, ratesnn)

In [None]:
nbkde, rates5, nph5 = phrates.kde_nbKDE(ph, tau)

In [None]:
nbkde2 = (1 + 2/nphn) * (ratesnn - 1)

In [None]:
assert np.allclose(nbkde, nbkde2)

In [None]:
assert np.all(nph5 == nphn)

In [None]:
assert np.allclose(ratesn, rates5)

# FRET-2CDE function

In [None]:
tau_s = 50e-6
tau = tau_s/d.clk_p
tau

In [None]:
phd = d.get_ph_times(ph_sel=Ph_sel(Dex='Dem'))
pha = d.get_ph_times(ph_sel=Ph_sel(Dex='Aem'))

mask_d = d.get_ph_mask(ph_sel=Ph_sel(Dex='Dem'))
mask_a = d.get_ph_mask(ph_sel=Ph_sel(Dex='Aem'))

bursts = ds.mburst[0]

### KDEs, same size as "phd"

In [None]:
KDE_ADi = phrates.kde_laplace_numba(pha, tau, time_axis=phd)
nbKDE_DDi, _, _ = phrates.kde_nbKDE(phd, tau)

### KDEs, same size as "pha"

In [None]:
KDE_DAi = phrates.kde_laplace_numba(phd, tau, time_axis=pha)
nbKDE_AAi, _, _ = phrates.kde_nbKDE(pha, tau)

### KDEs, same size as "ph"

In [None]:
KDE_TTi, nph_TTi = phrates.kde_laplace_nph_numba(ph, tau)
KDE_DTi, nph_DTi = phrates.kde_laplace_nph_numba(phd, tau, time_axis=ph)
KDE_ATi, nph_ATi = phrates.kde_laplace_nph_numba(pha, tau, time_axis=ph)

nbKDE_DTi = (1 + 2/nph_DTi) * (KDE_DTi - 1)
nbKDE_ATi = (1 + 2/nph_ATi) * (KDE_ATi - 1)

In [None]:
np.allclose(nbKDE_DTi[mask_d], nbKDE_DDi)

In [None]:
np.allclose(nbKDE_ATi[mask_a], nbKDE_AAi)

## KDEs burst by burst

In [None]:
burst = bursts[0]
burst

In [None]:
FRET_2CDE = []
ED, EA = [], []
for ib, burst in enumerate(bursts):
    burst_slice = slice(int(burst.istart), int(burst.istop) + 1)

    kde_adi = KDE_ATi[burst_slice][mask_d[burst_slice]]
    nbkde_ddi = nbKDE_DTi[burst_slice][mask_d[burst_slice]]
    N_chd = mask_d[burst_slice].sum()
    ED.append(np.sum(kde_adi / (kde_adi + nbkde_ddi)) / N_chd)
    assert N_chd == kde_adi.size

    kde_dai = KDE_DTi[burst_slice][mask_a[burst_slice]]
    nbkde_aai = nbKDE_ATi[burst_slice][mask_a[burst_slice]]
    N_cha = mask_a[burst_slice].sum()
    EA.append(np.sum(kde_dai / (kde_dai + nbkde_aai)) / N_cha)
    assert N_cha == kde_dai.size
    
    fret_2cde = 110 - 100*(ED[-1] + EA[-1])
    FRET_2CDE.append(fret_2cde)
FRET_2CDE = np.array(FRET_2CDE)

In [None]:
len(FRET_2CDE), bursts.num_bursts

In [None]:
plt.plot(ds.E[0], FRET_2CDE, 'o', alpha=0.05)

In [None]:
valid = np.isfinite(FRET_2CDE)

In [None]:
fig = plt.figure(figsize=(6,6))
sns.kdeplot(ds.E[0][valid], data2=FRET_2CDE[valid], cmap='viridis', shade=True, shade_lowest=False)
plt.xlabel('E')
plt.ylabel('FRET-2CDE')

In [None]:
fig = plt.figure(figsize=(6,6))
plt.hexbin(ds.E[0][valid], FRET_2CDE[valid], gridsize=50, cmap='Spectral_r', mincnt=1)
plt.xlabel('E')
plt.ylabel('FRET-2CDE')

In [None]:
sns.palplot(sns.color_palette('Spectral_r', 12))

In [None]:
blue = sns.color_palette('Spectral_r', 12)[0]

In [None]:
g = sns.jointplot(ds.E[0], FRET_2CDE, kind='hex', 
                  joint_kws={'cmap': 'Spectral_r', 'mincnt': 1, 'gridsize': 50},
                  marginal_kws={'color': blue,})
g.ax_joint.set_xlabel('E')
g.ax_joint.set_ylabel('FRET-2CDE')

In [None]:
plt.plot(ph[:1000]*d.clk_p, -KDE_ATi[:1000], color=bpl.red, alpha=0.6)
plt.plot(ph[:1000]*d.clk_p,  KDE_DTi[:1000], color=bpl.green, alpha=0.6)
plt.xlabel('Time (s)')
plt.xlabel('Rate (kcps)')

In [None]:
KDE_TTig = phrates.kde_gaussian_numba(ph, tau)

In [None]:
s = slice(0, 500)
fig, ax = plt.subplots(figsize=(16, 3.5))
ax.plot(ph[s]*d.clk_p, KDE_TTi[s]/(2*(tau_s*1e3)), 'o', ms=4, alpha=0.6)
ax.plot(ph[s]*d.clk_p, KDE_TTig[s]/(2*(tau_s*1e3)), 'o', ms=4, alpha=0.6)
ax.set_ylabel('Rate (kcps)')
ax.set_xlabel('Time (s)')

In [None]:
t = np.arange(0, 0.1, 0.1e-3)
t_clk = t/d.clk_p
kde_rates = phrates.kde_laplace_numba(ph, tau, time_axis=t_clk)
kde_ratesg = phrates.kde_gaussian_numba(ph, tau, time_axis=t_clk)

In [None]:
fig, ax = plt.subplots(figsize=(16, 3.5))
ax.plot(t, kde_rates/(2*(tau_s*1e3)), alpha=0.6)
ax.plot(t, kde_ratesg/(2*(tau_s*1e3)), alpha=0.6)
ax.set_ylabel('Rate (kcps)')
ax.set_xlabel('Time (s)')