In [224]:
import pandas as pd
import numpy as np
import scipy as sp
import functools
import random

was_tail = 1#'T'
was_head = 0#'H'

def generate_coin_plip(p_tail):
    p=random.random()
    return was_tail if p<p_tail else was_head

def generage_outcome(p,coinflips):
    return [generate_coin_plip(p) for i in range(0,coinflips)]


In [239]:
#1 Bernoulli distribution
p=0.5
outcome_range=1000

outcome1 = generage_outcome(p, outcome_range)
bernoulli_test = sp.stats.bernoulli.rvs(p,size=outcome_range)

print("mean:{:.2}, variance:{:.2}".format(np.mean(outcome1), np.var(outcome1)))
print("mean:{:.2}, variance:{:.2}".format(np.mean(bernoulli_test), np.var(bernoulli_test)))

mean:0.49, variance:0.25
mean:0.51, variance:0.25


In [274]:
# Binominal distribution
def count_outcomes_in_single_event(outcome):
    return (outcome.count(was_tail),outcome.count(was_head))

number_of_outcomes=10000
outcome_range=100
p=0.5
outcomes = [generage_outcome(p,outcome_range) for i in range(0,number_of_outcomes)]
m = map(lambda x: count_outcomes_in_single_event(x), outcomes)
d = pd.DataFrame(list(m), columns=['tails','heads'])

binom_test = sp.stats.binom.rvs(n=outcome_range, p=p,size=number_of_outcomes)
print("mean:{:.3}, variance:{:.3}".format(np.mean(d.tails), np.var(d.tails)))
print("mean:{:.3}, variance:{:.3}".format(np.mean(binom_test), np.var(binom_test)))

#CDF test.
#what is the probability of having "num_of_tails_up_to" or less tails?
num_of_tails_up_to = 48
print("{:.2%}".format(sum([d[d.tails == i].shape[0]/d.shape[0] for i in range(0,num_of_tails_up_to+1)])))
print("{:.2%}".format(sp.stats.binom.cdf(num_of_tails_up_to,n=outcome_range, p=p)))

mean:50.0, variance:24.9
mean:49.9, variance:25.2
38.02%
38.22%


In [285]:
#Normal distribution.
#1. increase the number of outcomes
#2. Normalize results

import matplotlib.pyplot as plt

def from_normalized(normalized, new_mean, new_std):
    return [int(0.5+x*new_std + new_mean) for x in normalized_tails]


number_of_outcomes=50000
outcome_range=100
p=0.5
outcomes = [generage_outcome(p,outcome_range) for i in range(0,number_of_outcomes)]
d = pd.DataFrame(list(map(lambda x: count_outcomes_in_single_event(x), outcomes)), columns=['tails','heads'])

tails_mean = d.tails.mean()
tails_std = d.tails.std()

normalized_tails = [(t-tails_mean)/tails_std for t in d.tails]

norm_mean = 1000
norm_std=18

norm_test = [int(x+0.5) for x in sp.stats.norm.rvs(loc=norm_mean,scale=norm_std,size=number_of_outcomes)]
coint_new = from_normalized(normalized_tails,norm_mean,norm_std)

print("\"coin distribution\" - mean:{:.3}, variance:{:.3}".format(np.mean(coint_new), np.var(coint_new)))
print("normal distribution - mean:{:.3}, variance:{:.3}".format(np.mean(norm_test), np.var(norm_test)))

#CDF test.
#what is the probability of having "num_of_tails_up_to" or less tails?
num_of_tails_up_to = 480

#print("{:.2%}".format(sum([coint_new[coint_new == i].shape[0]/d.shape[0] for i in range(0,num_of_tails_up_to+1)])))
#print("{:.2%}".format(sp.stats.binom.cdf(num_of_tails_up_to,n=outcome_range, p=p)))



"coin distribution" - mean:1e+03, variance:3.24e+02
normal distribution - mean:1e+03, variance:3.25e+02


In [328]:
#Poisson distribution

#Albinism is a rare genetic desease that affect one in 20000 people
#What is the probability that exactly 2 persons of 1000 randomly peoples will hve albinism.

#On average we have 2.7 deseases per year.
#What is the probability of 1 or less deceises in current year?
poisson_lambda=2.7

#solution number 1
print("Poisson CDF solution {:.2%}".format(sp.stats.poisson.cdf(1,poisson_lambda)))
#solution number 2
norm_test = [int(x+0.5) for x in sp.stats.poisson.rvs(mu=poisson_lambda,size=number_of_outcomes)]
print("Poisson numeric solution{:.2%}".format(sum(x<=1 for x in norm_test)/number_of_outcomes))

#"coin flip" solution
number_of_outcomes=50000
outcome_range=100
p=0.027
coin_outcomes = [sum(generage_outcome(p,outcome_range)) for i in range(0,number_of_outcomes)]
print("\"Coin flip\" numeric solution{:.2%}".format(sum(x<=1 for x in coin_outcomes)/number_of_outcomes))

#Can this be solved with Binominal distribution? Sure, Poisson is a good approximation with lambda = n*p

Poisson CDF solution 24.87%
Poisson numeric solution24.76%
"Coin flip" numeric solution24.39%
