## Simple Bayesian inference.

Consider a jar containing three tokens, either:
 * Two red tokens and one black one
 * One red token and two black ones.


We'll represent the jar as a string, either `RBB` or `RRB`

In [3]:
import random
import tabulate
from fractions import Fraction

from plotly import graph_objects as go
from itertools import islice

## The components of Bayes theorem

$$ 
P(E|H)
$$

In [4]:
def p_e_given_h(evidence: str, hypothesis: str) -> Fraction:
    return Fraction(hypothesis.count(evidence), len(hypothesis))

In [5]:
p_e_given_h("R", "RBB")

Fraction(1, 3)

"If I hypothesize there is one red token, it means I believe the probability of selecting a red token is 1-in-3"

In [6]:
p_e_given_h("R", "RRB")

Fraction(2, 3)

In [7]:
"If I hypothesize there are two red tokens, it means I believe the probability of selecting a red token is 2-in-3 "

'If I hypothesize there are two red tokens, it means I believe the probability of selecting a red token is 2-in-3 '


$$
P(E) 
$$
$$
=  P(E|H) P(H) + P(E| \neg H ) P(\neg H)
$$ 

(i.e. the probability of observing evidence `E` is the weighted sum of the observing of seeing it under all possible hypotheses)

(Note that this is actually a function on our priors))

In [8]:
def p_e(evidence, priors):
    """
    My current belief about the likelihood of `E` occuring at all
    is stated by integrating all possible hypotheses
    """
    assert sum(priors.values()) == 1, priors.values()

    return sum(
        [
            p_e_given_h(evidence, hypothesis) * prior
            for hypothesis, prior in priors.items()
        ]
    )

In [9]:
p_e("R", priors={"RRB": Fraction(1, 10), "RBB": Fraction(9, 10)})

Fraction(11, 30)

"I believe the probability of selecting a red token  is close to 1-in-3"

In [10]:
p_e("B", priors={"RRB": Fraction(1, 10), "RBB": Fraction(9, 10)})

Fraction(19, 30)

"I believe the probability of selecting a blue token is close to 2-in-3"

## Inference 

$$
P(H|E) = \frac{P(E|H)P(H)}{P(E)}
$$

In [11]:
def infer(evidence: str, priors: dict[str, Fraction]) -> dict[str, Fraction]:
    "apply Bayes formula to a set of priors and a piece of evidence"
    assert sum(priors.values()) == 1
    return {
        hypothesis:Fraction(  p_e_given_h(evidence, hypothesis) * p_h , p_e(evidence, priors))
        for hypothesis, p_h in priors.items()
    }

In [12]:
def inference(tokens, priors, hypothesis):
    yield dict(evidence='', prior=priors[hypothesis])
    while True:
        evidence = random.choice(tokens)
        posteriors = infer(evidence, priors)
        priors = posteriors
        yield dict(evidence=evidence, prior=priors[hypothesis])

In [13]:
def plot (output, hypothesis,tokens):

    y = [float(row["prior"]) for row in output]

    fig = go.Figure(
        data=[
            go.Scatter(
                y=y,
                line={"shape": "hv"},
                fill="tozeroy",
                fillgradient=dict(
                    type="vertical",
                    colorscale=[(0.0, "darkblue"), (0.5, "royalblue"), (1.0, "cyan")],
                ),
            )
        ]
    )
    title=f'Hypothesis: {hypothesis}. Actual: {tokens}. Hypothesis is {str(hypothesis==tokens).lower()}.'
    fig.update_layout(yaxis=dict(range=(0, 1)),margin=dict(l=10,r=10,t=30,b=10),height=150,title=title )
 
    return fig
 

def output_table(output):
    return (
        tabulate.tabulate(
            output, headers={"evidence": "E", "prior": "P(H|E)"}, tablefmt="rounded_outline"
        )
    )

In [14]:
def simulate():
    hypothesis='RBB'
    
    tokens = random.choice(["RBB", "RRB"])
     
    priors = {"RBB": Fraction(5, 10), "RRB": Fraction(5, 10)}
 
    steps = 50

    output= list(islice(inference(tokens, priors, hypothesis), steps))
    return plot (output,hypothesis,tokens)


In [15]:
for _ in range(10):
    simulate().show()