Normal distribution percentages:
    68.27% - 1 standard deviation from the mean
    95.45% - 2 stds
    99.73% - 3 stds

mean = expected value = average

mean = sum(relative frequency * value)

relative frequency = number of units / total number of units


# Discrete Random Variables

Problem 1:
![Discrete Random Variables: Problem 1](img/random_p1.png)

In [1]:
# solution 1:
# calculate std knowing the mean, values and their probabilities 

import math

MU = 100
values = [200, -99800]
probabilities = [0.999, 0.001]

# first we need to find variance
# to do so we calculate:
# 1. how far each actual value from the mean is
# 2. raise it to the second power
# 3. multiply by its probability

variance = 0
for value, probability in zip(values, probabilities):
    variance += (value - MU)**2 * probability
    
sigma = math.sqrt(variance)

print("Variance is %.2f " % variance)
print("Standard deviation is %.2f"% sigma)

Variance is 9990000.00 
Standard deviation is 3160.70


# Normal distribution

In [24]:
import math
import scipy.stats as ss

SAMPLE_SIZE = 400
MU_SAMPLE_UPPER_BOUND = 0.47
MU_POLPULATION = 0.43
sigma = math.sqrt(MU_POLPULATION*(1-MU_POLPULATION)/SAMPLE_SIZE)

cumulative_probability = ss.norm.cdf(MU_SAMPLE_UPPER_BOUND, MU_POLPULATION, sigma)
print("Probability that sample probability is < %.2f for sample of %d, \
when probability of population is %.2f: %.2f" % (MU_SAMPLE_UPPER_BOUND, SAMPLE_SIZE, MU_POLPULATION, cumulative_probability))

Probability that sample probability is < 0.47 for sample of 400, when probability of population is 0.43: 0.95


# Binomial distribution

Problem 1:
![Binomial Distribution: Problem 1](img/random_p2.png)

In [2]:
# this snippet covers a both cumulative (as in the example) probability
# as well as probability of a specific discrete random variable
# calculating probability of an event

import scipy.stats as ss

TOTAL_TRIALS = 3
SUCCESS_RATE = 0.9

# example 1: exact value (binompdf)

TARGET_SUCCESSES_NUMBER = 2

probability = ss.binom.pmf(TARGET_SUCCESSES_NUMBER, TOTAL_TRIALS, SUCCESS_RATE)
print("Probability that we have exactly %d sucesses is %.2f" % (TARGET_SUCCESSES_NUMBER, probability))

# example 2: range (binomcdf)

TARGET_LESS_OR_EQUAL_NUMBER = 1

cumulative_probability = ss.binom.cdf(TARGET_LESS_OR_EQUAL_NUMBER, TOTAL_TRIALS, SUCCESS_RATE)
print("Probability that we have <= %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, cumulative_probability))
# this is the solution for the exact above problem, p >= 2 is the same as 1 - (p < 1) for a discrete variable
print("Probability that we have > %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, 1-cumulative_probability))

Probability that we have exactly 2 sucesses is 0.24
Probability that we have <= 1 sucesses is 0.03
Probability that we have > 1 sucesses is 0.97


Problem 2:
![Binomial Distribution: Problem 2](img/random_p4.png)

In [3]:
# mean and std of a binomial random variable

import math

sample = 15
success_rate = 0.3

mean = sample * success_rate
print(mean)

variance = sample * success_rate * (1-success_rate)
std = math.sqrt(variance)
print(std)

4.5
1.7748239349298849


# Geometric distribution

Problem 1:
![Geometric Distribution: Problem 1](img/random_p3.png)

In [4]:
# geometric random variable

import scipy.stats as ss

SUCCESS_RATE = 0.6
TARGET_NUMBER = 2

probability = ss.geom.pmf(TARGET_NUMBER, SUCCESS_RATE)
print("Probability that we have exactly %d sucesses is %.2f" % (TARGET_NUMBER, probability))

TARGET_LESS_OR_EQUAL_NUMBER = 2

cumulative_probability = ss.geom.cdf(TARGET_LESS_OR_EQUAL_NUMBER, SUCCESS_RATE)
print("Probability that we have <= %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, cumulative_probability))
# this is the solution for the exact above problem, p < 3 is the same as p <= 2 for a discrete variable
print("Probability that we have > %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, 1-cumulative_probability))


Probability that we have exactly 2 sucesses is 0.24
Probability that we have <= 2 sucesses is 0.84
Probability that we have > 2 sucesses is 0.16


## Snippets

In [5]:
# inspired by theoretical versus experimental probability:
# https://www.khanacademy.org/math/statistics-probability/probability-library/randomness-probability-and-simulation/v/experimental-versus-theoretical-probability-simulation

import random

THROWS_NUMBER = 3
THRESHOLD = 10
TRIALS_NUMBER = 100

def is_winner_trial_game(throws_number, threshold):
    result = sum(random.randint(1,6) for x in range(throws_number))
    return result >= threshold

def generate_games(trials, throws_number, threshold):
    
    games_won = 0
    
    for _ in range(trials): 
        if is_winner_trial_game(throws_number, threshold):
            games_won += 1
            
    print(games_won/trials)

generate_games(TRIALS_NUMBER, THROWS_NUMBER, THRESHOLD)



0.55


In [6]:
# n choose k
import math

def calculate_combinations(n, k):
    # another way is to take the number of permutations and divide it by factorial of k
    return math.factorial(n) // (math.factorial(n-k) * math.factorial(k))

calculate_combinations(3,0)

1

In [7]:
# from Peter Norvig on probabilities:
# https://nbviewer.jupyter.org/url/norvig.com/ipython/Probability.ipynb

import fractions

def P(event, space):
    "The probability of an event, given the sample space of outcomes"
    return fractions.Fraction(len(event & space), len(space))

# for intersect (&) to work we need to use set, not list
space = {1,2,3,4,5,6}
event = {2,4,6}

P(event, space)

Fraction(1, 2)