In [1]:
import set_paths
from probability.discrete import DiscreteDistribution, ConditionalTable

# Bayesian Reasoning and Machine Learning

## 1.1 Probability Refresher 

### 1.1.1 Interpreting Conditional Probability

In [2]:
darts = DiscreteDistribution.from_dict(
    data={i: 1/20 for i in range(1, 21)},
    var_names='region'
)
print(darts)

DiscreteDistribution: P(region)
region
1     0.05
2     0.05
3     0.05
4     0.05
5     0.05
6     0.05
7     0.05
8     0.05
9     0.05
10    0.05
11    0.05
12    0.05
13    0.05
14    0.05
15    0.05
16    0.05
17    0.05
18    0.05
19    0.05
20    0.05
Name: p, dtype: float64
sum: 1.0000000000000002


In [3]:
1 / 19

0.05263157894736842

In [4]:
darts.given(region__ne=20).p(region=5)

0.05263157894736841

In [5]:
darts.p(region=5, region__ne=20) / darts.p(region__ne=20)

0.05263157894736841

In [6]:
darts.p(region=5) / darts.p(region__ne=20)

0.05263157894736841

### 1.1.2 Probability Tables

In [7]:
p_c = DiscreteDistribution.from_counts({
    'england': 60_776_238,
    'scotland': 5_116_900,
    'wales': 2_980_700
}, 'country')
print(p_c)

DiscreteDistribution: P(country)
country
england     0.882429
scotland    0.074294
wales       0.043278
Name: p, dtype: float64
sum: 0.9999999999999999


In [8]:
p_l__c = ConditionalTable.from_dict({
    ('english', 'england'): 0.95,
    ('english', 'scotland'): 0.7,
    ('english', 'wales'): 0.6,
    ('scottish', 'england'): 0.04,
    ('scottish', 'scotland'): 0.3,
    ('scottish', 'wales'): 0.0,
    ('welsh', 'england'): 0.01,
    ('welsh', 'scotland'): 0.0,
    ('welsh', 'wales'): 0.4,
}, ['language', 'country'], 'country')
print(p_l__c)

ConditionalTable: P(language|country)
language  country 
english   england     0.95
          scotland    0.70
          wales       0.60
scottish  england     0.04
          scotland    0.30
          wales       0.00
welsh     england     0.01
          scotland    0.00
          wales       0.40
Name: p, dtype: float64
sum: 3.0


In [9]:
p_lc = p_l__c * p_c
print(p_lc)

DiscreteDistribution: P(language,country)
language  country 
english   england     0.838307
          scotland    0.052006
          wales       0.025967
scottish  england     0.035297
          scotland    0.022288
          wales       0.000000
welsh     england     0.008824
          scotland    0.000000
          wales       0.017311
Name: p, dtype: float64
sum: 0.9999999999999999


## 1.2 Probabilistic Reasoning

#### Example 1.2

In [10]:
p_kj = DiscreteDistribution.from_dict(data={
    'yes': 1e-5, 
    'no': 1 - 1e-5
}, var_names='has_kj')
print(p_kj)

DiscreteDistribution: P(has_kj)
has_kj
yes    0.00001
no     0.99999
Name: p, dtype: float64
sum: 1.0


In [11]:
p_h__kj = ConditionalTable.from_dict({
    ('yes', 'yes'): 0.9,
    ('no', 'yes'): 0.1
}, var_names=['eats_hbs', 'has_kj'], cond_var_names='has_kj')
print(p_h__kj)

ConditionalTable: P(eats_hbs|has_kj)
eats_hbs  has_kj
yes       yes       0.9
no        yes       0.1
Name: p, dtype: float64
sum: 1.0


##### 1)

In [12]:
p_h = DiscreteDistribution.from_dict({'yes': 0.5, 'no': 0.5}, var_names='eats_hbs')
print(p_h)

DiscreteDistribution: P(eats_hbs)
eats_hbs
yes    0.5
no     0.5
Name: p, dtype: float64
sum: 1.0


In [13]:
p_kj__h = p_h__kj * p_kj / p_h
print(p_kj__h)

ConditionalTable: P(has_kj|eats_hbs)
has_kj  eats_hbs
yes     no          0.000002
        yes         0.000018
Name: p, dtype: float64
sum: 2e-05


In [14]:
print(p_kj__h.p(has_kj='yes', eats_hbs='yes'))

1.8e-05


##### 2)

In [15]:
p_h = DiscreteDistribution.from_dict({'yes': 0.001, 'no': 0.999}, var_names='eats_hbs')
print(p_h)

DiscreteDistribution: P(eats_hbs)
eats_hbs
yes    0.001
no     0.999
Name: p, dtype: float64
sum: 1.0


In [16]:
p_kj__h = p_h__kj * p_kj / p_h
print(p_kj__h)

ConditionalTable: P(has_kj|eats_hbs)
has_kj  eats_hbs
yes     no          0.000001
        yes         0.009000
Name: p, dtype: float64
sum: 0.009001001001001001


In [17]:
print(p_kj__h.p(has_kj='yes', eats_hbs='yes'))

0.009


#### Example 1.3

In [68]:
p_b = DiscreteDistribution.from_dict({'yes': 0.6, 'no': 0.4}, var_names='butler')
print(p_b)

DiscreteDistribution: P(butler)
butler
yes    0.6
no     0.4
Name: p, dtype: float64
sum: 1.0


In [69]:
p_m = DiscreteDistribution.from_dict({'yes': 0.2, 'no': 0.8}, var_names='maid')
print(p_m)

DiscreteDistribution: P(maid)
maid
yes    0.2
no     0.8
Name: p, dtype: float64
sum: 1.0


In [70]:
# p_bm = p_b * p_m
p_bm = DiscreteDistribution.from_dict({
    ('yes', 'yes'): 0.6 * 0.2,
    ('yes', 'no'): 0.6 * 0.8,
    ('no', 'yes'): 0.4 * 0.2,
    ('no', 'no'): 0.4 * 0.8
}, var_names=['butler', 'maid'])
print(p_bm)

DiscreteDistribution: P(butler,maid)
butler  maid
yes     yes     0.12
        no      0.48
no      yes     0.08
        no      0.32
Name: p, dtype: float64
sum: 1.0


In [71]:
p_k__bm = ConditionalTable.from_dict({
    ('yes', 'no', 'no'): 0.3,
    ('yes', 'no', 'yes'): 0.2,
    ('yes', 'yes', 'no'): 0.6,
    ('yes', 'yes', 'yes'): 0.1,
    ('no', 'no', 'no'): 0.7,
    ('no', 'no', 'yes'): 0.8,
    ('no', 'yes', 'no'): 0.4,
    ('no', 'yes', 'yes'): 0.9,
}, var_names=['knife_used', 'butler', 'maid'], cond_var_names=['butler', 'maid'])
print(p_k__bm)

ConditionalTable: P(knife_used|butler,maid)
knife_used  butler  maid
yes         no      no      0.3
                    yes     0.2
            yes     no      0.6
                    yes     0.1
no          no      no      0.7
                    yes     0.8
            yes     no      0.4
                    yes     0.9
Name: p, dtype: float64
sum: 4.0


In [72]:
p_m__k = p_bm * p_k__bm
print(p_m__k)

DiscreteDistribution: P(knife_used,butler,maid)
knife_used  butler  maid
no          no      no      0.224
                    yes     0.064
            yes     no      0.192
                    yes     0.108
yes         no      no      0.096
                    yes     0.016
            yes     no      0.288
                    yes     0.012
Name: p, dtype: float64
sum: 1.0


In [73]:
p_bm__k_yes = p_m__k.given(knife_used='yes')
print(p_bm__k_yes)

DiscreteDistribution: P(butler,maid|knife_used=yes)
butler  maid
no      no      0.233010
        yes     0.038835
yes     no      0.699029
        yes     0.029126
Name: p, dtype: float64
sum: 0.9999999999999999


In [74]:
p_b_yes__k_yes = p_bm__k_yes.margin('butler')
print(p_b_yes__k_yes)

DiscreteDistribution: P(butler)
butler
no     0.271845
yes    0.728155
Name: p, dtype: float64
sum: 0.9999999999999999


In [76]:
p_m_butler__k_yes = p_b_yes__k_yes.p(butler='yes')
p_m_butler__k_yes

0.7281553398058251

### 1.3.1 Two dice : what were the individual scores?