In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
df = pd.read_csv('data/processed.csv', index_col=0)
df.index = pd.to_datetime(df.index)

In [None]:
fig = plt.figure(figsize=(12, 8))
plt.title("Did the user's texting habits change over time?")
plt.bar(df.index, df['sender'])
plt.ylabel("count of text-msgs received")
plt.xlabel("Date")
plt.xticks(rotation=90)

In [None]:
import pymc3 as pm
import theano.tensor as tt

In [None]:
mean_msg = df['sender'].mean() 
n_msg = len(df['sender'])
print(mean_msg, n_msg)

In [None]:
with pm.Model() as model:
    alpha = 1.0/mean_msg  # Recall count_data is the variable that holds our txt counts

    lambda_1 = pm.Exponential("lambda_1", alpha)
    lambda_2 = pm.Exponential("lambda_2", alpha)
    
    tau = pm.DiscreteUniform("tau", lower=0, upper=n_msg - 1)
    
    idx = np.arange(n_msg) # Index
    lambda_ =  pm.math.switch(tau > idx, lambda_1, lambda_2)
    
    observation = pm.Poisson("obs", lambda_, observed=df['sender'].values)
    
    step = pm.Metropolis()
    trace = pm.sample(3000, tune=500,step=step)

In [None]:
lambda_1_samples = trace['lambda_1']
lambda_2_samples = trace['lambda_2']
tau_samples = trace['tau']

In [None]:
fig = plt.figure(figsize=(12.5, 10))
#histogram of the samples:

ax = plt.subplot(311)
ax.set_autoscaley_on(False)

plt.hist(lambda_1_samples, histtype='stepfilled', bins=30, alpha=0.85,
         label="posterior of $\lambda_1$", color="#A60628", density=True)
plt.legend(loc="upper left")
plt.title(r"""Posterior distributions of the variables
    $\lambda_1,\;\lambda_2,\;\tau$""")
plt.xlim([10, 200])
plt.xlabel("$\lambda_1$ value")

ax = plt.subplot(312)
ax.set_autoscaley_on(False)
plt.hist(lambda_2_samples, histtype='stepfilled', bins=30, alpha=0.85,
         label="posterior of $\lambda_2$", color="#7A68A6", density=True)
plt.legend(loc="upper left")
plt.xlim([10, 200])
plt.xlabel("$\lambda_2$ value")

plt.subplot(313)
w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples)
plt.hist(tau_samples, bins=n_msg, alpha=1,
         label=r"posterior of $\tau$",
         color="#467821", weights=w, rwidth=2.)
# plt.xticks(np.arange(n_msg))

plt.legend(loc="upper left")
plt.ylim([0, .75])
plt.xlim([35, n_msg])
plt.xlabel(r"$\tau$ (in days)")
plt.ylabel("probability");

In [None]:
df.index

In [None]:
fig = plt.figure(figsize=(12.5, 10))
# tau_samples, lambda_1_samples, lambda_2_samples contain
# N samples from the corresponding posterior distribution
N = tau_samples.shape[0]
expected_texts_per_day = np.zeros(n_msg)
for day in range(0, n_msg):
    # ix is a bool index of all tau samples corresponding to
    # the switchpoint occurring prior to value of 'day'
    ix = day < tau_samples
    # Each posterior sample corresponds to a value for tau.
    # for each day, that value of tau indicates whether we're "before"
    # (in the lambda1 "regime") or
    #  "after" (in the lambda2 "regime") the switchpoint.
    # by taking the posterior sample of lambda1/2 accordingly, we can average
    # over all samples to get an expected value for lambda on that day.
    # As explained, the "message count" random variable is Poisson distributed,
    # and therefore lambda (the poisson parameter) is the expected value of
    # "message count".
    expected_texts_per_day[day] = (lambda_1_samples[ix].sum()
                                   + lambda_2_samples[~ix].sum()) / N


plt.plot(range(n_msg), expected_texts_per_day, lw=4, color="#E24A33",
         label="expected number of text-messages received")
plt.xlim(0, n_msg)
plt.xlabel("Day")
plt.ylabel("Expected # text-messages")
plt.title("Expected number of text-messages received")
plt.bar(np.arange(len(df['sender'].values)), df['sender'].values, color="#348ABD", alpha=0.65,
        label="observed texts per day")

plt.legend(loc="upper left");

In [None]:
hist = np.histogram(tau_samples, bins=n_msg)[0]
idx_nonzero = hist.nonzero()
print(idx_nonzero)
hist[idx_nonzero]