In [1]:
from __future__ import print_function, division

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import math
import numpy as np
from scipy.special import gamma

from thinkbayes2 import Pmf, Suite
import thinkplot

In [3]:
pmf = Pmf()
for x in range(6):
    pmf.Set(x, 1/6.0)

In [4]:
pmf.Print()

0 0.16666666666666666
1 0.16666666666666666
2 0.16666666666666666
3 0.16666666666666666
4 0.16666666666666666
5 0.16666666666666666


# ThinkBayes CH2
## 2.1 Words

In [13]:
word_list = ["I", "have", "some", "words", "here", "yes", "I", "have"]
words = Pmf()
for word in word_list:
    words.Incr(word, 1)
    
words.Normalize()
words.Print()

print(words.Prob("I"))

I 0.25
have 0.25
here 0.125
some 0.125
words 0.125
yes 0.125
0.25


## 2.2 Cookie

In [14]:
BOWL1 = 'Bowl 1'
BOWL2 = 'Bowl 2'
bowls = Pmf()
bowls.Set(BOWL1, 0.5)
bowls.Set(BOWL2, 0.5)

bowls[BOWL1] *= 0.75
bowls[BOWL2] *= 0.5

bowls.Normalize()

0.625

In [15]:
print(bowls.Prob(BOWL1))

0.6000000000000001


## 2.3 Bayes framework

In [21]:
class Cookie(Pmf):
    def __init__(self, hypos):
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()
    
    def Update(self, data):
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    
    mixes = {
        BOWL1: dict(vanilla=0.75, chocolate=0.25),
        BOWL2: dict(vanilla=0.5, chocolate=0.5),
    }
    
    def Likelihood(self, data, hypo):
        mix = self.mixes[hypo]
        like = mix[data]
        return like


In [25]:
hypos = [BOWL1, BOWL2]
cookie_bowls = Cookie(hypos)
cookie_bowls.Print()

Bowl 1 0.5
Bowl 2 0.5


In [26]:
cookie_bowls.Update('vanilla')

In [27]:
cookie_bowls.Print()

Bowl 1 0.6000000000000001
Bowl 2 0.4


In [28]:
VANILLA = 'vanilla'
CHOCOLATE = 'chocolate'
dataset = [VANILLA, CHOCOLATE, VANILLA]

for data in dataset:
    cookie_bowls.Update(data)
    
cookie_bowls.Print()

Bowl 1 0.627906976744186
Bowl 2 0.37209302325581395


## 2.4 Monty Hall

In [30]:
class Monty(Pmf):
    def __init__(self, hypos):
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()

    def Update(self, data):
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    
    def Likelihood(self, data, hypo):
        if hypo == data:
            return 0
        elif hypo == 'A':
            return 0.5
        else:
            return 1

monty_hypos = 'ABC'
monty = Monty(monty_hypos)

data = 'B'
monty.Update(data)

monty.Print()

A 0.3333333333333333
B 0.0
C 0.6666666666666666


## 2.5 Encapsulating the framework

In [31]:
class MySuite(Pmf):
    """Represents a suite of hypotheses and their probs"""
    
    def __init__(self, hypo=tuple()):
        """Initializes the distribution"""
        
    def Update(self, data):
        """Updates each hypothesis based on the data"""
    
    def Print(self):
        """Prints the hypotheses and their probabilities"""


### or just use the thinkbayes2 one

In [32]:
from thinkbayes2 import Suite

class MontySuite(Suite):
    def Likelihood(self, data, hypo):
        if hypo == data:
            return 0
        elif hypo == 'A':
            return 0.5
        else:
            return 1
        
suite = MontySuite('ABC')
suite.Update('B')
suite.Print()

A 0.3333333333333333
B 0.0
C 0.6666666666666666


## 2.6 MMMM problem

In [34]:
mix94 = dict(brown=30, yellow=20, red=20, green=10, orange=10, tan=10)
mix96 = dict(blue=24, green=20, orange=16, yellow=14, red=13, brown=13)

hypoA = dict(bag1=mix94, bag2=mix96)
hypoB = dict(bag1=mix96, bag2=mix94)



class MMMM(Suite):
    hypotheses = dict(A=hypoA, B=hypoB)
    
    def Likelihood(self, data, hypo):
        bag, color = data
        mix = self.hypotheses[hypo][bag]
        like = mix[color]
        return like

suite = MMMM('AB')

suite.Update(('bag1', 'yellow'))
suite.Update(('bag2', 'green'))

suite.Print()

A 0.7407407407407407
B 0.2592592592592592


## 2.7 Discussion

Suite is an **abstract type**, because it's pretty much useless by itself, but provides the framework to build **concrete types** like *MMMM* and *MontySuite*


## 2.8 Cookies being eaten

In [None]:
# do a real problem now...

## REAL PROBLEMS ABOUT ELVIS
3) This one is from one of my favorite books, David MacKay's "Information Theory, Inference, and Learning Algorithms":
Elvis Presley had a twin brother who died at birth.  What is the probability that Elvis was an identical twin?
To answer this one, you need some background information: According to the Wikipedia article on twins:  ``Twins are estimated to be approximately 1.9% of the world population, with monozygotic twins making up 0.2% of the total---and 8% of all twins.''


In [35]:

class Elvis(Suite):
    def Likelihood(self, data, hypo):
        if data == 'male':
            return .50
        else: # data == 'female'
            return .25
    
elvis = Elvis(dict(identical_twin=0.019, just_twin=0.002))

In [38]:
elvis.Normalize()
elvis.Update('male')
elvis.Print()

identical_twin 0.9047619047619047
just_twin 0.09523809523809523
