In [3]:
import sys
sys.version

'3.8.8 (tags/v3.8.8:024d805, Feb 19 2021, 13:18:16) [MSC v.1928 64 bit (AMD64)]'

In [4]:
import numpy as np

# Posterior Probability Computation

In [5]:
# hypotheses representation
dice_face = np.array([4, 6, 8, 12, 20])
n_dice =  len(dice_face)  # dice type: 

P_hyp = np.array([1./n_dice]*n_dice)
P_hyp

array([0.2, 0.2, 0.2, 0.2, 0.2])

In [6]:
obs = 6

P_liklihood = [ 0 if obs > dice_face[i] else 1./dice_face[i] for i in range(n_dice)]
P_liklihood = np.array(P_liklihood)
P_liklihood

array([0.        , 0.16666667, 0.125     , 0.08333333, 0.05      ])

In [7]:
# un-normalized posterior  P(data|h) * p(h), for each h
P_posterior = P_liklihood * P_hyp
# normalization term
Z = P_posterior.sum()
# posterior 
P_posterior = P_posterior / Z
P_posterior

array([0.        , 0.39215686, 0.29411765, 0.19607843, 0.11764706])

## Update Posterior Probability with more data

In [8]:
# what if you roll the same dice more and observe a sequence [6,8,7,7,5,4]
obs_seq = [6,8,7,7,5,4]

In [9]:
def update(P_prior, obs):
    # likelihood
    likelihood = [ 0 if obs > dice_face[i] else 1./dice_face[i] for i in range(n_dice)]
    likelihood = np.array(likelihood)
    #
    posterior = likelihood * P_prior
    posterior /= posterior.sum()
    return posterior

In [10]:
post = P_posterior
for obs in obs_seq:
    post = update(post, obs)
    print(obs, post)

6 [0.         0.52562418 0.2956636  0.13140604 0.04730618]
8 [0.         0.         0.73513396 0.21781747 0.04704857]
7 [0.         0.         0.81757401 0.1614961  0.02092989]
7 [0.         0.         0.87571253 0.11532017 0.0089673 ]
5 [0.         0.         0.91584527 0.08040343 0.0037513 ]
4 [0.         0.         0.94324845 0.05520613 0.00154542]


## Another way: using the assumption of independent observation
The observations are independently observed:
$$
    P(a, b | \theta_i) = P(a|\theta_i) * P(b|\theta_i)
$$
where $\theta_i$ is the probability of any outcome $x$ from the $i$-th die.

For example, let's assume we drew a 6-face die and got a sequence [2, 4, 6]. Then the total likelihood is 
$$
    P_\mbox{total} = P(2|6die) \times P(4|6die) \times P(6|6die) = 1 / 6^3
$$
If the sequence was [2, 4, 6, 8] (this is impossible as we know),
$$
    P_\mbox{total} = P(2|6die) \times P(4|6die) \times P(6|6die) \times P(8|6die) = 0
$$

In [11]:
total_obs_seq = np.array([6, 6,8,7,7,5,4])
total_likelihood = np.ones_like(dice_face)
for obs in total_obs_seq:
    likelihood = np.array([ 0 if obs > dice_face[i] else 1./dice_face[i] for i in range(n_dice)])
    total_likelihood = total_likelihood * likelihood
#
total_likelihood

array([0.00000000e+00, 0.00000000e+00, 4.76837158e-07, 2.79081647e-08,
       7.81250000e-10])

In [12]:
total_posterior = total_likelihood * P_hyp
total_posterior = total_posterior / total_posterior.sum()
total_posterior

array([0.        , 0.        , 0.94324845, 0.05520613, 0.00154542])

**Note** The result is the same as the one obtained from the sequential update.

---

# Now with numpyro and JAX
- numpyro: http://num.pyro.ai/en/stable/
    - very fast MCMC inference compared to pyro, pymc3
    - cpu only.
    - linux/MacOSX only. No Windows 10!
    - developed based on pyro and JAX
- JAX : https://jax.readthedocs.io/en/latest/notebooks/thinking_in_jax.html
    - another numpy with autograd
    - JAX arrays are always immutable.
        - For updating individual elements, JAX provides an indexed update syntax that returns an updated copy:
        ```
        y = x.at[0].set(10)
        ```
- tensorflow

In [1]:
import numpyro
import numpyro as npr
import jax.numpy as jnp
import numpy as np

## Posterior Probability Computation

In [2]:
# hypotheses representation
dice_face = jnp.array([4, 6, 8, 12, 20])
n_dice =  len(dice_face)  # dice type: 

P_hyp = jnp.array([1./n_dice]*n_dice)
P_hyp

DeviceArray([0.2, 0.2, 0.2, 0.2, 0.2], dtype=float32)

In [3]:
obs = 6

P_liklihood = [ 0 if obs > dice_face[i] else 1./dice_face[i] for i in range(n_dice)]
P_liklihood = jnp.array(P_liklihood)
P_liklihood

DeviceArray([0.        , 0.16666667, 0.125     , 0.08333334, 0.05      ],            dtype=float32)

In [4]:
# un-normalized posterior  P(data|h) * p(h), for each h
P_posterior = P_liklihood * P_hyp
# normalization constant
Z = P_posterior.sum()
# posterior 
P_posterior = P_posterior / Z
P_posterior

DeviceArray([0.        , 0.39215687, 0.29411766, 0.19607843, 0.11764707],            dtype=float32)

## Update Posterior Probability with more data

In [5]:
# what if you roll the same dice more and observe a sequence [6,8,7,7,5,4]
obs_seq = [6,8,7,7,5,4]

In [6]:
def update(P_prior, obs):
    # likelihood
    likelihood = [ 0 if obs > dice_face[i] else 1./dice_face[i] for i in range(n_dice)]
    likelihood = jnp.array(likelihood)
    #
    posterior = likelihood * P_prior
    posterior /= posterior.sum()
    return posterior

In [7]:
post = P_posterior
for obs in obs_seq:
    post = update(post, obs)
    print(obs, post)

6 [0.         0.5256242  0.29566363 0.13140605 0.04730618]
8 [0.         0.         0.73513395 0.21781747 0.04704857]
7 [0.         0.         0.81757396 0.1614961  0.02092989]
7 [0.         0.         0.8757126  0.11532018 0.0089673 ]
5 [0.         0.         0.9158452  0.08040343 0.0037513 ]
4 [0.         0.         0.94324845 0.05520613 0.00154542]


## Another way: using the assumption of independent observation
The observations are independently observed:
$$
    P(a, b | \theta_i) = P(a|\theta_i) * P(b|\theta_i)
$$
where $\theta_i$ is the probability of any outcome $x$ from the $i$-th die.

In [9]:
total_obs_seq = jnp.array([6, 6,8,7,7,5,4])
total_likelihood = jnp.ones_like(dice_face)   # initialization
for obs in total_obs_seq:
    likelihood = jnp.array([ 0 if obs > dice_face[i] else 1./dice_face[i] for i in range(n_dice)])
    total_likelihood = total_likelihood * likelihood
#
total_likelihood

DeviceArray([0.0000000e+00, 0.0000000e+00, 4.7683716e-07, 2.7908172e-08,
             7.8125012e-10], dtype=float32)

In [10]:
total_posterior = total_likelihood * P_hyp
total_posterior = total_posterior / total_posterior.sum()
total_posterior

DeviceArray([0.        , 0.        , 0.94324845, 0.05520614, 0.00154542],            dtype=float32)

**Note** The result is the same as the one obtained from the sequential update.

---

# Now let's do it again with pytorch and pyro

In [13]:
import torch
import torch as th
import pyro

## Posterior Probability Computation

In [14]:
# hypotheses representation
dice_face = th.tensor([4, 6, 8, 12, 20])
n_dice =  len(dice_face)  # dice type: 

P_hyp = th.tensor([1./n_dice]*n_dice)
P_hyp

tensor([0.2000, 0.2000, 0.2000, 0.2000, 0.2000])

In [15]:
obs = 6

P_liklihood = [ 0 if obs > dice_face[i] else 1./dice_face[i] for i in range(n_dice)]
P_liklihood = th.tensor(P_liklihood)
P_liklihood

tensor([0.0000, 0.1667, 0.1250, 0.0833, 0.0500])

In [16]:
# un-normalized posterior  P(data|h) * p(h), for each h
P_posterior = P_liklihood * P_hyp
# normalization constant
Z = P_posterior.sum()
# posterior 
P_posterior = P_posterior / Z
P_posterior

tensor([0.0000, 0.3922, 0.2941, 0.1961, 0.1176])

## Update Posterior Probability with more data

In [17]:
# what if you roll the same dice more and observe a sequence [6,8,7,7,5,4]
obs_seq = [6,8,7,7,5,4]

In [18]:
def update(P_prior, obs):
    # likelihood
    likelihood = [ 0 if obs > dice_face[i] else 1./dice_face[i] for i in range(n_dice)]
    likelihood = torch.tensor(likelihood)
    #
    posterior = likelihood * P_prior
    posterior /= posterior.sum()
    return posterior

In [19]:
post = P_posterior
for obs in obs_seq:
    post = update(post, obs)
    print(obs, post)

6 tensor([0.0000, 0.5256, 0.2957, 0.1314, 0.0473])
8 tensor([0.0000, 0.0000, 0.7351, 0.2178, 0.0470])
7 tensor([0.0000, 0.0000, 0.8176, 0.1615, 0.0209])
7 tensor([0.0000, 0.0000, 0.8757, 0.1153, 0.0090])
5 tensor([0.0000, 0.0000, 0.9158, 0.0804, 0.0038])
4 tensor([0.0000, 0.0000, 0.9432, 0.0552, 0.0015])


## Another way: using the assumption of independent observation
The observations are independently observed:
$$
    P(a, b | \theta_i) = P(a|\theta_i) * P(b|\theta_i)
$$
where $\theta_i$ is the probability of any outcome $x$ from the $i$-th die.

In [20]:
total_obs_seq = th.tensor([6, 6,8,7,7,5,4])
total_likelihood = th.ones_like(dice_face)   # initialization
for obs in total_obs_seq:
    likelihood = th.tensor([ 0 if obs > dice_face[i] else 1./dice_face[i] for i in range(n_dice)])
    total_likelihood = total_likelihood * likelihood
#
total_likelihood

tensor([0.0000e+00, 0.0000e+00, 4.7684e-07, 2.7908e-08, 7.8125e-10])

In [21]:
total_posterior = total_likelihood * P_hyp
total_posterior = total_posterior / total_posterior.sum()
total_posterior

tensor([0.0000, 0.0000, 0.9432, 0.0552, 0.0015])

**Note** The result is the same as the one obtained from the sequential update.