In [1]:
word_meaning = {1, 2, 7}
# this word can refer to entities 1, 2, and 7

In [2]:
class Shorthair:
    def __init__(self, name):
        self.name = name
        self.personality = 'strange'

    def speak(self):
        print(self.name + ' goes, "Meow."')

Sauce = Shorthair('Saucy')
Ketchup = Shorthair('Ketchup')
Mayo = Shorthair('Mayo')

In [3]:
cat = {Sauce, Ketchup, Mayo}
# cat is a noun, and it can refer to the shorthair cats, Sauce, Ketchup, and Mayo
# for this example, the set of possible entities that cat can refer to is Sauce, Ketchup, and Mayo

In [4]:
cat = {Sauce, Ketchup, Mayo, Mayo}
cat

{<__main__.Shorthair at 0x104e5f8d0>,
 <__main__.Shorthair at 0x104e61f90>,
 <__main__.Shorthair at 0x104e63510>}

In [5]:
from numpy import prod

%matplotlib inline
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf')

  set_matplotlib_formats('svg', 'pdf')


In [6]:
def normalize_probabilities(probs):
    total = sum(probs)
    normalized_probs = []
    for p in probs:
        normalized_probs.append(p / total)
    return normalized_probs

# sum every probability in a list of probs, e.g. [0.2, 0.3, 0.4, 0.5] 
# for every probability in that list, divide it by the calculated sum
# 0.2 / (0.2 + 0.3 + 0.4 + 0.5) = 0.2 / 0.9 = 0.2 repeating

In [7]:
# learners consider hypotheses of word meanings

class Longhair:
    def __init__(self, name):
        self.name = name
        self.personality = 'fanciful'

Hiccup = Longhair('Hiccup')

cat_hypothesis_space = [{set[Shorthair]}, {set[Shorthair], set[Longhair]}]
# the word cat can refer to the set of Shorthairs, OR the set of shorthairs and the set of longhairs, that includes Hiccup

In [8]:
all_hypotheses = [{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10},
         {0,1},{1,2},{2,3},{3,4},{4,5},{5,6},{6,7},{7,8},{8,9},{9,10},
         {0,1,2},{1,2,3},{2,3,4},{3,4,5},{4,5,6},{5,6,7},{6,7,8},{7,8,9},{8,9,10},
         {0,1,2,3},{1,2,3,4},{2,3,4,5},{3,4,5,6},{4,5,6,7},{5,6,7,8},{6,7,8,9},{7,8,9,10},
         {0,1,2,3,4},{1,2,3,4,5},{2,3,4,5,6},{3,4,5,6,7},{4,5,6,7,8},{5,6,7,8,9},{6,7,8,9,10},
         {0,1,2,3,4,5},{1,2,3,4,5,6},{2,3,4,5,6,7},{3,4,5,6,7,8},{4,5,6,7,8,9},{5,6,7,8,9,10}]

all_hypotheses
# assumptions:
# 1. there are 10 entities in this world, 0 - 10
# 2. a word refers to at least one of those entities, and up to (inclusive) 6 entities
# 3. hypotheses include entities that are clustered; in other words, 0 is clustered with entities 1, 2, 3, 4, 5

[{0},
 {1},
 {2},
 {3},
 {4},
 {5},
 {6},
 {7},
 {8},
 {9},
 {10},
 {0, 1},
 {1, 2},
 {2, 3},
 {3, 4},
 {4, 5},
 {5, 6},
 {6, 7},
 {7, 8},
 {8, 9},
 {9, 10},
 {0, 1, 2},
 {1, 2, 3},
 {2, 3, 4},
 {3, 4, 5},
 {4, 5, 6},
 {5, 6, 7},
 {6, 7, 8},
 {7, 8, 9},
 {8, 9, 10},
 {0, 1, 2, 3},
 {1, 2, 3, 4},
 {2, 3, 4, 5},
 {3, 4, 5, 6},
 {4, 5, 6, 7},
 {5, 6, 7, 8},
 {6, 7, 8, 9},
 {7, 8, 9, 10},
 {0, 1, 2, 3, 4},
 {1, 2, 3, 4, 5},
 {2, 3, 4, 5, 6},
 {3, 4, 5, 6, 7},
 {4, 5, 6, 7, 8},
 {5, 6, 7, 8, 9},
 {6, 7, 8, 9, 10},
 {0, 1, 2, 3, 4, 5},
 {1, 2, 3, 4, 5, 6},
 {2, 3, 4, 5, 6, 7},
 {3, 4, 5, 6, 7, 8},
 {4, 5, 6, 7, 8, 9},
 {5, 6, 7, 8, 9, 10}]

In [9]:
def calculate_prior(possible_hypotheses):
    prior = []
    for h in possible_hypotheses:
        prior.append(1/len(possible_hypotheses))
    return prior

In [10]:
calculate_prior(all_hypotheses)

[0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431372549,
 0.0196078431

In [11]:
input = [7, 7, 7, 7, 8]

In [12]:
def likelihood(data, hypothesis):
    likelihoods = []
    for data_item in data:
        if data_item in hypothesis:
            likelihood_this_item = 1/len(hypothesis)
        else: 
            likelihood_this_item = 0
        likelihoods.append(likelihood_this_item)
    return prod(likelihoods)

In [13]:
print(likelihood([0, 6, 7, 8], {7, 8, 9}))
print(likelihood([6, 7], {7, 8, 9}))
print(likelihood([6, 7], {7, 8, 9}))
likelihood([6, 7], {6, 7})

0.0
0.0
0.0


0.25

In [14]:
def posterior(data, possible_hypotheses, prior):
    posteriors = []
    for i in range(len(possible_hypotheses)):
        h = possible_hypotheses[i]
        prior_h = prior[i]
        likelihood_h = likelihood(data, h)
        posterior_h = prior_h * likelihood_h
        posteriors.append(posterior_h)
    return normalize_probabilities(posteriors)

In [15]:
my_prior = calculate_prior(all_hypotheses)
posterior([0, 0, 1], all_hypotheses, my_prior)

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.6568863586599518,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.19463299515850427,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.08211079483249398,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.042040726954236926,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.024329124394813034,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

Questions:

1. How does the amount of data influence the posterior distribution? For instance, is the posterior the same after seeing the data [0, 0, 1] and the data [0, 0, 1, 0, 0, 1]? 

In [16]:
my_posterior1 = posterior([0, 0, 1], all_hypotheses, my_prior)
for i in range(len(all_hypotheses)):
    print(all_hypotheses[i], my_posterior1[i])

{0} 0.0
{1} 0.0
{2} 0.0
{3} 0.0
{4} 0.0
{5} 0.0
{6} 0.0
{7} 0.0
{8} 0.0
{9} 0.0
{10} 0.0
{0, 1} 0.6568863586599518
{1, 2} 0.0
{2, 3} 0.0
{3, 4} 0.0
{4, 5} 0.0
{5, 6} 0.0
{6, 7} 0.0
{8, 7} 0.0
{8, 9} 0.0
{9, 10} 0.0
{0, 1, 2} 0.19463299515850427
{1, 2, 3} 0.0
{2, 3, 4} 0.0
{3, 4, 5} 0.0
{4, 5, 6} 0.0
{5, 6, 7} 0.0
{8, 6, 7} 0.0
{8, 9, 7} 0.0
{8, 9, 10} 0.0
{0, 1, 2, 3} 0.08211079483249398
{1, 2, 3, 4} 0.0
{2, 3, 4, 5} 0.0
{3, 4, 5, 6} 0.0
{4, 5, 6, 7} 0.0
{8, 5, 6, 7} 0.0
{8, 9, 6, 7} 0.0
{8, 9, 10, 7} 0.0
{0, 1, 2, 3, 4} 0.042040726954236926
{1, 2, 3, 4, 5} 0.0
{2, 3, 4, 5, 6} 0.0
{3, 4, 5, 6, 7} 0.0
{4, 5, 6, 7, 8} 0.0
{5, 6, 7, 8, 9} 0.0
{6, 7, 8, 9, 10} 0.0
{0, 1, 2, 3, 4, 5} 0.024329124394813034
{1, 2, 3, 4, 5, 6} 0.0
{2, 3, 4, 5, 6, 7} 0.0
{3, 4, 5, 6, 7, 8} 0.0
{4, 5, 6, 7, 8, 9} 0.0
{5, 6, 7, 8, 9, 10} 0.0


In [17]:
my_posterior2 = posterior([0, 0, 1, 0, 0, 1], all_hypotheses, my_prior)
for i in range(len(all_hypotheses)):
    print(all_hypotheses[i], my_posterior2[i])

{0} 0.0
{1} 0.0
{2} 0.0
{3} 0.0
{4} 0.0
{5} 0.0
{6} 0.0
{7} 0.0
{8} 0.0
{9} 0.0
{10} 0.0
{0, 1} 0.9018073901245205
{1, 2} 0.0
{2, 3} 0.0
{3, 4} 0.0
{4, 5} 0.0
{5, 6} 0.0
{6, 7} 0.0
{8, 7} 0.0
{8, 9} 0.0
{9, 10} 0.0
{0, 1, 2} 0.07917101916045172
{1, 2, 3} 0.0
{2, 3, 4} 0.0
{3, 4, 5} 0.0
{4, 5, 6} 0.0
{5, 6, 7} 0.0
{8, 6, 7} 0.0
{8, 9, 7} 0.0
{8, 9, 10} 0.0
{0, 1, 2, 3} 0.014090740470695633
{1, 2, 3, 4} 0.0
{2, 3, 4, 5} 0.0
{3, 4, 5, 6} 0.0
{4, 5, 6, 7} 0.0
{8, 5, 6, 7} 0.0
{8, 9, 6, 7} 0.0
{8, 9, 10, 7} 0.0
{0, 1, 2, 3, 4} 0.0036938030699500374
{1, 2, 3, 4, 5} 0.0
{2, 3, 4, 5, 6} 0.0
{3, 4, 5, 6, 7} 0.0
{4, 5, 6, 7, 8} 0.0
{5, 6, 7, 8, 9} 0.0
{6, 7, 8, 9, 10} 0.0
{0, 1, 2, 3, 4, 5} 0.001237047174382058
{1, 2, 3, 4, 5, 6} 0.0
{2, 3, 4, 5, 6, 7} 0.0
{3, 4, 5, 6, 7, 8} 0.0
{4, 5, 6, 7, 8, 9} 0.0
{5, 6, 7, 8, 9, 10} 0.0


After seeing the data, [0, 0, 1], the hypothesis {0, 1} has a posterior probability of 0.6568863586599518. After seeing the data, [0, 0, 1, 0, 0, 1], the hypothesis {0, 1} has a posterior probability of 0.9018073901245205.
The posterior probability of hypothesis {0, 1} increases after more information (i.e., data) is seen.
The posterior probability of hypothesis {0, 1, 2} decreases after more information: after [0, 0, 1] it is 0.19463299515850427, and after [0, 0, 1, 0, 0, 1] it is 0.07917101916045172.
In effect, the hypothesis that predicts fewer word meanings is preferred after more data.
This reflects the size principle: 

$p(X|h)=[\frac{1}{size(h)}]^n$

Hypotheses that predict the word has fewer meanings (i.e., that have smaller extensions), assign exponentially greater probability to the same data than do hypotheses that predict the word has many more meanings. 

2. When are more specific word meanings preferred? When are more general word meanings preferred?

The most specific word meanings that explain (or, predict) the observed data are always preferred: they have fewer extensions, and if these extensions occur in the observed data, they are preferred over hypotheses that maintain extensions that did not occur in the observed data.

3. This code calculates a probability distribution over possible hypotheses given some data. If you had to commit to a single hypothesis, how would you choose one?

I would choose one that explains the data best. Given that I have no (known) preference for any of the possible hypotheses (in other words, equal priors assigned to each hypothesis), I would pick the hypothesis with the greatest likelihood: given the data, the hypothesis that best predicted the observed data. 

In this case, I pick the hypothesis with the greatest posterior probability. I pick the MAP hypothesis. 

If I do not pick the MAP hypothesis, I could pick a random hypothesis, despite the fact that there is one specific hypothesis with the greatest MAP. However, I would be more likely to select hypotheses that have greater posterior probabilities. In effect, I would more often select from hypotheses with posterior probabilities of 0.8, 0.9, etc, but I could select one with a low posterior probability.

4. Do we have any kind of innateness in our model? Are there word meanings that our model learner could never learn, no matter what kind of data we gave them?

If innateness is "what the learner brings to the task of learning word meanings," then our model does have innateness. 
1. The prior : our learner does not prefer any hypothesis in all_hypotheses; they all have equal probability.
2. The hypothesis space: there are hypotheses that we did not include, e.g. a) ones that predict the word refers to between 1 and 11 entities, b) ones that predict the word does not refer to clusters in the space of possible entities (e.g., {0, 11} is not a possible hypothesis). Given that we did not include these hypotheses, these hypotheses can never be learned by a learner. 
3. The likelihood: we assumed that the learner knows 


what data for each hypothesis should look like. In other words, given some labelling behaviour, the learner considers how likely that labelling behaviour is for each hypothesis. 

5. [Harder] We are assuming that candidate word meanings are nice and neat: a word refers to a set of entities that are contiguous in some sense (as represented by consecutive integers: i.e. our hypothesis space includes {3,4,5} but not {3,5} as a candidate meaning). Calculate the posterior probability distribution using the standard hypothesis space and the data [3,5]. Which hypothesis has the highest posterior probability? Now add a "hypothesis with a hole in it", {3,5} to the hypothesis space and recalculate the posterior. Which hypothesis has the highest posterior probability now, and why? Is there a potential problem here if we are trying to model learning? How would we change the model to disfavour this kind of "hypothesis with a hole in it"?

In [18]:
my_posterior3 = posterior([3, 5], all_hypotheses, my_prior)
for i in range(len(all_hypotheses)):
    print(all_hypotheses[i], my_posterior3[i])

{0} 0.0
{1} 0.0
{2} 0.0
{3} 0.0
{4} 0.0
{5} 0.0
{6} 0.0
{7} 0.0
{8} 0.0
{9} 0.0
{10} 0.0
{0, 1} 0.0
{1, 2} 0.0
{2, 3} 0.0
{3, 4} 0.0
{4, 5} 0.0
{5, 6} 0.0
{6, 7} 0.0
{8, 7} 0.0
{8, 9} 0.0
{9, 10} 0.0
{0, 1, 2} 0.0
{1, 2, 3} 0.0
{2, 3, 4} 0.0
{3, 4, 5} 0.23781212841854926
{4, 5, 6} 0.0
{5, 6, 7} 0.0
{8, 6, 7} 0.0
{8, 9, 7} 0.0
{8, 9, 10} 0.0
{0, 1, 2, 3} 0.0
{1, 2, 3, 4} 0.0
{2, 3, 4, 5} 0.13376932223543397
{3, 4, 5, 6} 0.13376932223543397
{4, 5, 6, 7} 0.0
{8, 5, 6, 7} 0.0
{8, 9, 6, 7} 0.0
{8, 9, 10, 7} 0.0
{0, 1, 2, 3, 4} 0.0
{1, 2, 3, 4, 5} 0.08561236623067776
{2, 3, 4, 5, 6} 0.08561236623067776
{3, 4, 5, 6, 7} 0.08561236623067776
{4, 5, 6, 7, 8} 0.0
{5, 6, 7, 8, 9} 0.0
{6, 7, 8, 9, 10} 0.0
{0, 1, 2, 3, 4, 5} 0.059453032104637316
{1, 2, 3, 4, 5, 6} 0.059453032104637316
{2, 3, 4, 5, 6, 7} 0.059453032104637316
{3, 4, 5, 6, 7, 8} 0.059453032104637316
{4, 5, 6, 7, 8, 9} 0.0
{5, 6, 7, 8, 9, 10} 0.0


In [19]:
holed_hypothesis = {3, 5}
all_hypotheses.append(holed_hypothesis)
all_hypotheses

my_prior2 = calculate_prior(all_hypotheses)

In [20]:
my_posterior4 = posterior([3, 5], all_hypotheses, my_prior2)
for i in range(len(all_hypotheses)):
    print(all_hypotheses[i], my_posterior4[i])

{0} 0.0
{1} 0.0
{2} 0.0
{3} 0.0
{4} 0.0
{5} 0.0
{6} 0.0
{7} 0.0
{8} 0.0
{9} 0.0
{10} 0.0
{0, 1} 0.0
{1, 2} 0.0
{2, 3} 0.0
{3, 4} 0.0
{4, 5} 0.0
{5, 6} 0.0
{6, 7} 0.0
{8, 7} 0.0
{8, 9} 0.0
{9, 10} 0.0
{0, 1, 2} 0.0
{1, 2, 3} 0.0
{2, 3, 4} 0.0
{3, 4, 5} 0.1549186676994578
{4, 5, 6} 0.0
{5, 6, 7} 0.0
{8, 6, 7} 0.0
{8, 9, 7} 0.0
{8, 9, 10} 0.0
{0, 1, 2, 3} 0.0
{1, 2, 3, 4} 0.0
{2, 3, 4, 5} 0.087141750580945
{3, 4, 5, 6} 0.087141750580945
{4, 5, 6, 7} 0.0
{8, 5, 6, 7} 0.0
{8, 9, 6, 7} 0.0
{8, 9, 10, 7} 0.0
{0, 1, 2, 3, 4} 0.0
{1, 2, 3, 4, 5} 0.05577072037180482
{2, 3, 4, 5, 6} 0.05577072037180482
{3, 4, 5, 6, 7} 0.05577072037180482
{4, 5, 6, 7, 8} 0.0
{5, 6, 7, 8, 9} 0.0
{6, 7, 8, 9, 10} 0.0
{0, 1, 2, 3, 4, 5} 0.03872966692486445
{1, 2, 3, 4, 5, 6} 0.03872966692486445
{2, 3, 4, 5, 6, 7} 0.03872966692486445
{3, 4, 5, 6, 7, 8} 0.03872966692486445
{4, 5, 6, 7, 8, 9} 0.0
{5, 6, 7, 8, 9, 10} 0.0
{3, 5} 0.34856700232378


In [21]:
hyp_1 = {1, 2}
hyp_2 = {1, 3}
hyps = [hyp_1, hyp_2]
hyp_3 = {2, 3}
hyps.append(hyp_3)

def calculate_prior2(possible_hypotheses):
    prior = []
    bad = []
    good = []
    bad_priors = []
    good_priors = []
    for h in possible_hypotheses:
        for x in h:
            if x == 1: 
                bad.append(h)
        if h not in bad: 
            good.append(h)
        if h in bad:
            bad_priors.append((1/len(possible_hypotheses)) * 0.2)
        bad_priors_sum = sum(bad_priors)
        good_priors_sum = 1 - bad_priors_sum
        for h in good:
            good_priors.append(good_priors_sum/(len(good)))
    for item in bad_priors:
        prior.append(item)
    for item in good_priors:
        prior.append(item)
    # return prior, bad, good, bad_priors, bad_priors_sum, good_priors_sum, good_priors, prior
    return prior

calculate_prior2(hyps)

[0.06666666666666667, 0.06666666666666667, 0.8666666666666667]

In [22]:
print(likelihood([1, 2], hyp_1))
print(likelihood([1, 3], hyp_2))
print(likelihood([2, 3], hyp_3))

print(likelihood([1, 2, 2], hyp_1))
print(likelihood([1, 3, 3], hyp_2))
print(likelihood([2, 3, 3], hyp_3))

for h in hyps:
    print(likelihood([1, 2], h))

print(likelihood([1, 2], hyp_1))
print(likelihood([1, 2], hyp_2))
print(likelihood([1, 2], hyp_3))


0.25
0.25
0.25
0.125
0.125
0.125
0.25
0.0
0.0
0.25
0.0
0.0


In [23]:
my_prior3 = calculate_prior2(hyps)
my_prior3

[0.06666666666666667, 0.06666666666666667, 0.8666666666666667]

In [24]:
def posterior(data, possible_hypotheses, prior):
    posteriors = []
    for i in range(len(possible_hypotheses)):
        h = possible_hypotheses[i]
        prior_h = prior[i]
        likelihood_h = likelihood(data, h)
        posterior_h = prior_h * likelihood_h
        posteriors.append(posterior_h)
    return normalize_probabilities(posteriors)

posterior([2], hyps, my_prior3)

my_posterior5 = posterior([2], hyps, my_prior3)
for i in range(len(hyps)):
    print(hyps[i], my_posterior5[i], my_prior3[i])

{1, 2} 0.07142857142857142 0.06666666666666667
{1, 3} 0.0 0.06666666666666667
{2, 3} 0.9285714285714286 0.8666666666666667


6. [Harder] We are assuming (in the likelihood function) that all meanings of a word are equally likely to be encountered. What other kinds of assumptions might you make? How could you model those?

We could make the assumption that middle terms are prototype examples. Leftmost and rightmost entities are fringe cases; as we move to the middle item in the list (from the left or the right), entities more closely resemble the protypical example(s, if the number of items in the list is even).
For practice, we could make the inverse assumption: items furthest from the center (from the left or right of the middle item(s) are prototype examples, and the middle item(s) are the most extreme fringe cases).

In [25]:
hyp_4 = {2, 3}
hyp_5 = {3, 4, 5}
hyp_6 = {2, 3, 4}

hyps2 = []
hyps2.append(hyp_4)
hyps2.append(hyp_5)
hyps2.append(hyp_6)
hyps2

[{2, 3}, {3, 4, 5}, {2, 3, 4}]

In [26]:
print(hyps2)

def likelihood2(data, hypothesis):
    likelihoods = []
    max_likelihoods = []
    for x in data:
        if x in hypothesis:
            likelihood_this_item = 1/len(hypothesis)
            if x == max(hypothesis):
                likelihood_this_item = (1/len(hypothesis)) * 3
                # the likelihood of the max term is 3x more likely than any other term
        else:
            likelihood_this_item = 0
        likelihoods.append(likelihood_this_item)
    total = sum(likelihoods)
    # the sum of 1/2 likelihood and the 3/2 likelihood
    normed_likelihoods = []
    for like in likelihoods:
        normed_likelihoods.append (like / total)
    # divide by the sum of the likelihoods
    return prod(normed_likelihoods)
    
for h in hyps2:
    print(likelihood2([2, 3], h))  

[{2, 3}, {3, 4, 5}, {2, 3, 4}]
0.1875
0.0
0.25


In [27]:
print(hyps2)

def likelihood3(data, hypothesis):
    likelihoods = []
    max_likelihoods = []
    if max(hypothesis) in data:
        for data_item in data:
            if data_item in hypothesis:
                likelihood_this_item = 1/len(hypothesis)
                if data_item == max(hypothesis):
                    likelihood_this_item = likelihood_this_item * 3
                # the likelihood of the max term is 3x more likely than any other term
            else:
                likelihood_this_item = 0
            max_likelihoods.append(likelihood_this_item)
            total = sum(max_likelihoods)
            # the sum of 1/2 likelihood and the 3/2 likelihood
            normed_max_likelihoods = []
            for like in max_likelihoods:
                normed_max_likelihoods.append(like/total)
                # divide by the sum of the likelihoods
        return prod(normed_max_likelihoods)
    else:
        for data_item in data:
            if data_item in hypothesis:
                likelihood_this_item = 1/len(hypothesis)
            else: 
                likelihood_this_item = 0
            likelihoods.append(likelihood_this_item)
        return prod(likelihoods)

for h in hyps2:
    print(likelihood3([2, 3], h))  

[{2, 3}, {3, 4, 5}, {2, 3, 4}]
0.1875
0.0
0.1111111111111111


In [28]:
hyps3 = []

hyp_7 = {2, 3}
hyp_8 = {1, 2}
hyp_9 = {2, 3, 5}

hyps3.append(hyp_7)
hyps3.append(hyp_8)
hyps3.append(hyp_9)

print(hyps3)

for h in hyps2:
    print(likelihood2([2, 3], h))

for h in hyps2:
    print(likelihood3([2, 3], h))

for h in hyps3:
    print(likelihood3([2, 3], h))

[{2, 3}, {1, 2}, {2, 3, 5}]
0.1875
0.0
0.25
0.1875
0.0
0.1111111111111111
0.1875
0.0
0.1111111111111111


EXTRA BAYES PRACTICE

In [50]:
# Priors

def practice_prior(possible_hypotheses):
    prior = []
    for h in possible_hypotheses:
        prior.append(1/len(possible_hypotheses))
    return prior

practice_hyps = [{0,1,2}, {0,1}]

my_prior4 = practice_prior(practice_hyps)
print(my_prior4)

# Likelihoods

def practice_likelihood(data, hypothesis):
    likelihoods = []
    for data_item in data:
        if data_item in hypothesis:
            likelihood_this_item = 1/len(hypothesis)
        else: 
            likelihood_this_item = 0
        likelihoods.append(likelihood_this_item)
    return prod(likelihoods)

var = []
for h in practice_hyps:
    var.append(practice_likelihood([0], h))
print(var)

# Posteriors

def posterior(data, possible_hypotheses, prior):
    posteriors = []
    for i in range(len(possible_hypotheses)):
        h = possible_hypotheses[i]
        prior_h = prior[i]
        likelihood_h = likelihood(data, h)
        posterior_h = prior_h * likelihood_h
        posteriors.append(posterior_h)
    return normalize_probabilities(posteriors)

my_posterior6 = posterior([0], practice_hyps, my_prior4)
for i in range(len(practice_hyps)):
    print(practice_hyps[i], my_prior4[i], var[i], my_posterior6[i])

[0.5, 0.5]
[0.3333333333333333, 0.5]
{0, 1, 2} 0.5 0.3333333333333333 0.4
{0, 1} 0.5 0.5 0.6000000000000001
