In [1]:
import set_paths

In [2]:
from probability.discrete import Discrete, Conditional

# Bayesian Reasoning and Machine Learning

## 1.1 Probability Refresher 

### 1.1.1 Interpreting Conditional Probability

In [3]:
darts = Discrete.from_probs(
    data={i: 1 / 20 for i in range(1, 21)},
    variables='region'
)

In [4]:
darts.data

region
1     0.05
2     0.05
3     0.05
4     0.05
5     0.05
6     0.05
7     0.05
8     0.05
9     0.05
10    0.05
11    0.05
12    0.05
13    0.05
14    0.05
15    0.05
16    0.05
17    0.05
18    0.05
19    0.05
20    0.05
Name: p(region), dtype: float64

In [5]:
1 / 19

0.05263157894736842

In [6]:
darts.given(region__ne=20).p(region=5)

0.05263157894736841

In [7]:
darts.p(region=5, region__ne=20) / darts.p(region__ne=20)

0.05263157894736841

In [8]:
darts.p(region=5) / darts.p(region__ne=20)

0.05263157894736841

### 1.1.2 Probability Tables

In [9]:
country = Discrete.from_counts({
    'england': 60_776_238,
    'scotland': 5_116_900,
    'wales': 2_980_700
}, 'country')

In [10]:
country.data

country
england     0.882429
scotland    0.074294
wales       0.043278
Name: p(country), dtype: float64

In [11]:
language__given__country = Conditional.from_probs(
    data={
        ('english', 'england'): 0.95,
        ('english', 'scotland'): 0.7,
        ('english', 'wales'): 0.6,
        ('scottish', 'england'): 0.04,
        ('scottish', 'scotland'): 0.3,
        ('scottish', 'wales'): 0.0,
        ('welsh', 'england'): 0.01,
        ('welsh', 'scotland'): 0.0,
        ('welsh', 'wales'): 0.4,
    }, 
    joint_variables='language',
    conditional_variables='country'
)

In [12]:
language__given__country.data

country,england,scotland,wales
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
english,0.95,0.7,0.6
scottish,0.04,0.3,0.0
welsh,0.01,0.0,0.4


In [13]:
language__country = language__given__country * country

In [14]:
language__country.data

country   language
england   english     0.838307
          scottish    0.035297
          welsh       0.008824
scotland  english     0.052006
          scottish    0.022288
          welsh       0.000000
wales     english     0.025967
          scottish    0.000000
          welsh       0.017311
Name: p(country, language), dtype: float64

## 1.2 Probabilistic Reasoning

#### Example 1.2

In [15]:
has_kj = Discrete.from_probs(data={
    'yes': 1e-5, 
    'no': 1 - 1e-5
}, variables='has_kj')

In [16]:
has_kj.data

has_kj
yes    0.00001
no     0.99999
Name: p(has_kj), dtype: float64

In [17]:
eats_hbs__given__has_kj = Conditional.from_probs({
        ('yes', 'yes'): 0.9,
        ('no', 'yes'): 0.1
    }, 
    joint_variables='eats_hbs', 
    conditional_variables='has_kj'
)

In [18]:
eats_hbs__given__has_kj.data

has_kj,yes
eats_hbs,Unnamed: 1_level_1
no,0.1
yes,0.9


###### 1)

In [19]:
eats_hbs = Discrete.from_probs({'yes': 0.5, 'no': 0.5}, variables='eats_hbs')

In [20]:
eats_hbs.data

eats_hbs
yes    0.5
no     0.5
Name: p(eats_hbs), dtype: float64

In [21]:
has_kj__given__eats_hbs = eats_hbs__given__has_kj * has_kj / eats_hbs

In [22]:
has_kj__given__eats_hbs.data

has_kj  eats_hbs
no      no               NaN
        yes              NaN
yes     no          0.000002
        yes         0.000018
Name: p(has_kj, eats_hbs), dtype: float64

In [23]:
has_kj__given__eats_hbs.p(has_kj='yes', eats_hbs='yes')

1.8e-05

###### 2)

In [24]:
eats_hbs = Discrete.from_probs({'yes': 0.001, 'no': 0.999}, variables='eats_hbs')

In [25]:
eats_hbs.data

eats_hbs
yes    0.001
no     0.999
Name: p(eats_hbs), dtype: float64

In [26]:
has_kj__given__eats_hbs = eats_hbs__given__has_kj * has_kj / eats_hbs

In [27]:
has_kj__given__eats_hbs.data

has_kj  eats_hbs
no      no               NaN
        yes              NaN
yes     no          0.000001
        yes         0.009000
Name: p(has_kj, eats_hbs), dtype: float64

In [28]:
has_kj__given__eats_hbs.p(has_kj='yes', eats_hbs='yes')

0.009

#### Example 1.3

In [29]:
butler = Discrete.from_probs({'yes': 0.6, 'no': 0.4}, variables='butler')

In [30]:
maid = Discrete.from_probs({'yes': 0.2, 'no': 0.8}, variables='maid')

In [31]:
butler__and__maid = butler * maid

In [32]:
butler__and__maid.data

butler  maid
yes     yes     0.12
        no      0.48
no      yes     0.08
        no      0.32
Name: p(butler, maid), dtype: float64

In [33]:
knife__given__butler__and__maid = Conditional.from_probs(data={
        ('yes', 'no', 'no'): 0.3,
        ('yes', 'no', 'yes'): 0.2,
        ('yes', 'yes', 'no'): 0.6,
        ('yes', 'yes', 'yes'): 0.1,
        ('no', 'no', 'no'): 0.7,
        ('no', 'no', 'yes'): 0.8,
        ('no', 'yes', 'no'): 0.4,
        ('no', 'yes', 'yes'): 0.9,
    }, 
    joint_variables='knife_used', 
    conditional_variables=['butler', 'maid']
)

In [34]:
knife__given__butler__and__maid.data

butler,no,no,yes,yes
maid,no,yes,no,yes
knife_used,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
no,0.7,0.8,0.4,0.9
yes,0.3,0.2,0.6,0.1


In [35]:
butler__and__maid__and__knife = knife__given__butler__and__maid * butler__and__maid

In [36]:
butler__and__maid__and__knife.data

butler  maid  knife_used
no      no    no            0.224
              yes           0.096
        yes   no            0.064
              yes           0.016
yes     no    no            0.192
              yes           0.288
        yes   no            0.108
              yes           0.012
Name: p(butler, maid, knife_used), dtype: float64

In [37]:
butler__given__knife = butler__and__maid__and__knife.given(knife_used='yes').p(butler='yes')

In [38]:
butler__given__knife

0.7281553398058251