# Chapter 3

Sampling from the posterior - we have computers, why do we need a closed form for the posterior distribution when we can have a massive numpy array instead?

In [None]:
import arviz
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
import seaborn as sns

import pybayes

sns.set_style("white") 

## Sampling from the grid-approximate posterior

In [None]:
# using the example from chapter 2 of 9 attempts, 6 successes.
posterior = pybayes.utils.grid_approximate_binomial(n=9,
                                                    k=6,
                                                    grid_size=1000,
                                                    prior=None,
                                                    plot=False)

In [None]:
pybayes.utils.plot_nicely(x_vals=posterior[:,0], y_vals=posterior[:,1])

In [None]:
# sample this distribution.
samples = np.random.choice(posterior[:,0],
                           size=int(1e4),
                           p=posterior[:,1],
                           replace=True)

In [None]:
# show the sampling - plot the sequence, then the density
fig, axes = plt.subplots(nrows=2, figsize=(5,10))
sns.scatterplot(x=np.arange(len(samples)), y=samples, alpha=0.2, ax=axes[0])
axes[0].set_ylim(0,1)
axes[0].set_xlabel('Sequence number')
axes[0].set_ylabel('Sampled p')

sns.histplot(x=samples, ax=axes[1], element='poly', fill=False)
axes[1].set_xlim(0,1)
axes[1].set_xlabel('Sampled p')
axes[1].set_ylabel('Frequency density')
plt.show()


Once we have samples from the posterior, we can do things we actually care about, such as point estimates, and compatability intervals (McElreath dislikes the phrase 'confidence interval'). 

### Intervals

In [None]:
# E.g. 1: what  is the probability that p < 0.5 given our data
# and binomial model?

# from our grid approximation
grid_approx_p_of_half = posterior[posterior[:,0] < 0.5, :][:, 1].sum()
print(f'grid result: {grid_approx_p_of_half: .3f}')

# from the samples (easier in general)
sampled_p_of_half = sum(samples < 0.5) / len(samples)
print(f'sampled result: {sampled_p_of_half: .3f}')


In [None]:
# E.g 2: what is the 10%-90% interval for our posterior p?  We can get
# this trivially from quantiles

print('10%\t90%')
print(f'{np.quantile(samples, 0.1):.2f}\t{np.quantile(samples,0.9):.2f}')

NB the percentile intervals are nice summaries of the distribution, unless the distro is highly skewed, but they're not ideal for inference.

### Point estimates

Note - you don't really need one. The Bayesian parameter estimate is the distribution you just computed, anything else is a discarding of useful information. But if you want one, one option is the maximum a posteriori (MAP) estimate, the mode of the distribution.

For the case where we have three successes in three trials, the distribution of p looks like:

In [None]:
posterior_three_successes = pybayes.utils.grid_approximate_binomial(n=3,
                                                    k=3,
                                                    grid_size=1000,
                                                    prior=None,
                                                    plot=True)
# sample this distribution.
samples_three_successes = np.random.choice(posterior_three_successes[:,0],
                           size=int(1e4),
                           p=posterior_three_successes[:,1],
                           replace=True)

In [None]:
# from the grid approximation:
mode = x_for_max_y = posterior_three_successes[np.argmax(posterior_three_successes[:, 1]), 0]
print(f'MAP from grid: {mode:.2f}')
# from the samples

### Sampling to simulate prediction

We can use our model to generate dummy data, and use that to influence model design, checking, validation, forecasting, etc.


For our binominal model before, for every possible p value there is an implied distribution of outcomes. By combining all these distributions together with the posterior probabilities of each p, we can get a ~ ~ posterior predictive distribution ~ ~. This is a more honest way of showing your predicted outcomes, because you include your uncertainty in your parameters (vs e.g picking the most probable value of p in our globe model and showing the implied distribution from that value). The posterior prediction distro is normally, as a result of incorporating this uncertainty, more spread out.

In [None]:
# do this as follows.
# to simulate the predicted observations for a single value of p in our binomial model, we do:
w_predictions = np.random.binomial(n=9, p=0.6, size=10_000) # 10,000 samples of the expected successes in 9 trails, when p=0.9

_ = pybayes.utils.hist(w_predictions)

In [None]:
# to instead propagate the uncertainty in our p values, we can use the samples from the posterior (from the grid approximation before)
w_posterior_predictions = np.random.binomial(n=9, p=samples, size=10_000)

_ = pybayes.utils.hist(w_posterior_predictions)

In the book, he uses these implied predictions to inspect the observed data (which was WLWWWLWLW) more stringently, by looking at the longest run length and the number of switches. You can run the simulation and plot the expected distribution under our model, and then see how our observation squares with this (longest run 3, number of switches 6). We will do that here, just for fun, and because it seems useful.

In [None]:
samples

In [None]:
sequences = []
for prob in samples:
    sequences.append(np.random.choice(['W', 'L'], size=9, p=[prob, 1-prob]))
sequences[:5]

In [None]:
def get_longest_run(sequence):
    """This could be a oneliner with `max(len(list(g)) for _, g in groupby(seq))`"""
    longest_run = 0
    prev_char = sequence[0]
    current_run = 1
    for char in sequence[1:]:
        if char == prev_char:
            current_run += 1
            if current_run > longest_run:
                longest_run = current_run
        else:
            current_run = 1
        prev_char = char
    return longest_run

def get_num_switches(sequence):
    """Oneliner is sum(1 for i in range(1, len(seq)) if seq[i] != seq[i-1])"""
    prev_char = sequence[0]
    num_switches = 0
    for char in sequence[1:]:
        if char != prev_char:
            num_switches += 1
        prev_char = char
    return num_switches


In [None]:
runs = [get_longest_run(x) for x in sequences]
switches = [get_num_switches(x) for x in sequences]

In [None]:
_ = pybayes.utils.hist(runs)

In [None]:
_ = pybayes.utils.hist(switches)

Note above that of our observed values (3 and 6), the 6 is a bit suspicious, and is associated with negative correlation between one result and the next.

## Solutions to exercies (spoilers)

novice, avert thy gaze

We use `samples` from before - likelihood taken from 9 trials, 6 successes, uniform prior, posterior grid-approximated then sampled.


In [None]:
#3E1: P(p < 0.2 | D)
(samples < 0.2).mean()

In [None]:
# 3E2: P(p>0.8 | D)
(samples > 0.8).mean()

In [None]:
# 3E3: P(0.2 < p < 0.8 | D)
((samples < 0.8) & (samples > 0.2)).mean()

In [None]:
# 3E4: for what x does P( p < x | D) = 0.2 ?
np.quantile(samples, 0.2)

In [None]:
# 3E5: for what x does P( p > x | D) = 0.2 ?
np.quantile(samples, 0.8)

In [None]:
# 3E6: which values of p have the narrowest interval equal to 66% of the posterior?
# really i should implement HPDI myself. But I shan't

arviz.hdi(samples, hdi_prob=0.66)

In [None]:
# 3E7: what about if we just want the PI, assuming equal posterior prob above and below the interval?

np.quantile(samples, [(1-0.66)/2 ,1-(1-0.66)/2])

In [None]:
# 3M1: If we have 8 successes in 15 trials, what would the posterior be?
new_posterior = pybayes.utils.grid_approximate_binomial(n=15,
                                                    k=8,
                                                    grid_size=1000,
                                                    prior=None,
                                                    plot=False)
pybayes.utils.plot_nicely(x_vals=new_posterior[:,0], y_vals=new_posterior[:,1])  # compare with prev, peak is shifted left.

In [None]:
# 3M2: draw 10,000 samples from the above, then get the 90% HPDI for p.
new_samples = np.random.choice(new_posterior[:,0],
                           size=int(1e4),
                           p=new_posterior[:,1],
                           replace=True)
pybayes.utils.hist(new_samples)

In [None]:
arviz.hdi(new_samples, hdi_prob=0.9)

In [None]:
# 3M3: generate samples from the posterior predictive distribution
new_posterior_predictive_samples = np.random.binomial(n=15, p=new_samples, size=10_000)
sns.histplot(new_posterior_predictive_samples, discrete=True)

In [None]:
# what is the probability of exactly 8 successes?
(new_posterior_predictive_samples == 8).mean()

In [None]:
new_posterior

In [None]:
new_samples

In [None]:
# 3M4: what is the probability of 6 waters in 9 tosses? 
ppd_9_tosses = np.random.binomial(n=9, p=new_samples, size=10_000)
(ppd_9_tosses == 6).mean()

In [None]:
",".join(arviz.hdi(new_samples, hdi_prob=0.9))

In [None]:
# 3M5 - as in 3M1 onwards, but now with step function prior at 0.5
grid_size = 1000
p_grid = np.linspace(0,1, grid_size)
prior = np.where(p_grid < 0.5, 0, 2)
# grid-approximate the posterior
step_posterior = pybayes.utils.grid_approximate_binomial(n=15,
                                                    k=8,
                                                    grid_size=grid_size,
                                                    prior=prior,
                                                    plot=False)
pybayes.utils.plot_nicely(x_vals=step_posterior[:,0], y_vals=step_posterior[:,1])
# draw some samples, find the hpdi
step_samples = np.random.choice(step_posterior[:,0],
                           size=int(1e4),
                           p=step_posterior[:,1],
                           replace=True)

hpdi = arviz.hdi(step_samples, hdi_prob=0.9)

print(f'HPDI: {hpdi[0]:.2f} - {hpdi[1]:.2f}')
# make the ppd, find p(8 of 15)
step_ppd = np.random.binomial(n=15, p=step_samples, size=10_000)
sns.histplot(step_ppd, discrete=True)
plt.show()
print('p(8 of 15 under step prior)', (step_ppd == 8).mean()) 

# find p(6 of 9) under this 8/15 posterior 
step_ppd_9_tosses = np.random.binomial(n=9, p=step_samples, size=10_000)
print('p(6 of 9 under step prior)', (step_ppd_9_tosses == 6).mean()) 

In [None]:
new_posterior = pybayes.utils.grid_approximate_binomial(n=15,
                                                    k=8,
                                                    grid_size=1000,
                                                    prior=None,
                                                    plot=False)


In [None]:
# 3M6: I want the width of the HPDI for my posterior to be 0.05 wide. How many times do I need to toss the globe for this?
# NB - not sure about this one
def hdpi_width(num_trials):
    # assume k generated from a binomial with p=0.7.
    k = np.random.binomial(n=num_trials, p=0.7)
    posterior = pybayes.utils.grid_approximate_binomial(n=int(num_trials),
                                                    k=k,
                                                    grid_size=100,
                                                    prior=None,
                                                    plot=False)
    samples = np.random.choice(posterior[:,0],
                               size=int(1e4),
                               p=posterior[:,1],
                               replace=True)
    hpdi = arviz.hdi(samples, hdi_prob=0.99)
    return hpdi[1] - hpdi[0]

In [None]:
def mean_width(num_trials, num_repeats=100):
    widths = [hdpi_width(num_trials) for i in range(num_repeats)]      
    return np.mean(widths)

In [None]:
trials = np.linspace(1, 5000, 100)

In [None]:
widths = [mean_width(x) for x in trials]

In [None]:
pybayes.utils.plot_nicely(x_vals=trials, y_vals=widths, xlabel='num_trials', ylabel='hpdi_width')

In [None]:
fig, ax = plt.subplots()
ax.plot(trials, widths)
ax.axhline(0.05, color='r', linestyle='--')
ax.set_yscale('log')

In [None]:
# 3H1. Here male=1, female=0, they represent the gender of first and second-born children
birth1 = np.asarray((1,0,0,0,1,1,0,1,0,1,0,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,
0,0,0,1,1,1,0,1,0,1,1,1,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,
1,1,0,1,0,0,1,0,0,0,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0,0,1,0,1,1,0,
1,0,1,1,1,0,1,1,1,1), dtype=bool)
birth2 = np.asarray((0,1,0,1,0,1,1,1,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,
1,1,1,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,0,1,1,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,1,1,
0,0,0,1,1,1,0,0,0,0), dtype=bool)


In [None]:
counts = [[0,0], [0,0]]

for i in (0,1):
    for j in (0,1):
        counts[i][j] = sum((birth1 ==i) & (birth2 ==j))

In [None]:
sns.heatmap(counts, annot=True)

In [None]:
# number of boys
sum(birth1) + sum(birth2)

In [None]:
# what is the grid-approximate posterior for a birth being a boy? Assume uniform prior.

# binomial with observed N = 200, k = 111
grid_size = 1000
p_grid = np.linspace(0,1, grid_size)
# prior = np.where(p_grid < 0.5, 0, 2)
# grid-approximate the posterior
boy_posterior = pybayes.utils.grid_approximate_binomial(n=200,
                                                    k=111,
                                                    grid_size=grid_size,
                                                    prior=None,
                                                    plot=True)



In [None]:
modal_p = boy_posterior[np.argmax(boy_posterior[:, 1]), 0]
print('modal probability of a boy', round(modal_p,2))

In [None]:
# 3H2: draw some samples, estimate the 50%, 89%, and 97% HPDIs.
boy_samples = np.random.choice(boy_posterior[:,0],
                           size=int(1e4),
                           p=boy_posterior[:,1],
                           replace=True)

for hpdi_prob in (0.5, 0.89, 0.97):
    hpdi = arviz.hdi(boy_samples, hdi_prob=hpdi_prob)
    print(f'HPDI for {hpdi_prob}:\t{hpdi[0]:.2f} - {hpdi[1]:.2f}')

In [None]:
# 3H3. Simulate 10,000 replicates of 200 births. Compare the distribution to the actual count. 
# NB size parameter is actually irrelevant here. It'll be one point for each p.
simulated_lads = np.random.binomial(n=200, p=boy_samples)


In [None]:
sns.histplot(simulated_lads, discrete=True)
plt.xlabel('Lads')
plt.axvline(111, c='red')
plt.show()

In [None]:
# 3H4 Now compare 10,000 samples from 100 simulated firstborns to the observed data. How's it looking?
firstborns = sum(birth1)

# use our samples from the big model
simulated_firstborns = np.random.binomial(n=100, p=boy_samples)

sns.histplot(simulated_firstborns, discrete=True)
plt.xlabel('Firstborn lads')
plt.axvline(firstborns, c='red')
plt.show()

In [None]:
# 3H5 - we have assumed independent first and second births. Check this by looking only at second births
# following female first births. Count the number of first borns who were girls, simulate that many births.
second_births_following_females = [y for (x,y) in zip(birth1, birth2) if x == 0]

In [None]:
total_following_females = len(second_births_following_females)

In [None]:
boys_following_females = sum(second_births_following_females)

In [None]:
simulated_seconds = np.random.binomial(n=total_following_females, p=boy_samples)

sns.histplot(simulated_seconds, discrete=True)
plt.xlabel('Secondborns')
plt.axvline(boys_following_females, c='red')
plt.show()