[Link to videos and exercises](https://www.khanacademy.org/math/statistics-probability/random-variables-stats-library)

In [1]:
# a cell to import modules and define helper functions
import math
import numpy
import scipy.stats as ss

def calculate_norm_cdf(lower_bound, upper_bound, mu, sigma):
    cdf_lower = ss.norm.cdf(lower_bound, mu, sigma)
    cdf_upper = ss.norm.cdf(upper_bound, mu, sigma)
    interval = cdf_upper-cdf_lower
    return interval

def calculate_combinations(n, k):
    # calculating binomial coefficient
    return math.factorial(n) // (math.factorial(n-k) * math.factorial(k))

# 1. Discrete random variables

$\sigma_X = \sqrt{\sum{(x_i - \mu_X)^2 p_i}}$

where:

$\sigma_X$ is a standard deviation of a discrete random variable X

$x_i$ is each specific outcome

$\mu_X$ is the mean of X

$p_i$ is probability of each specifc outcome

![](img/random_variables_p1.png)

In [None]:
MU = 100
VALUES = [200, -99800]
PROBALITIES = [0.999, 0.001]

variance = 0
for value, probability in zip(VALUES, PROBALITIES):
    variance += (value - MU)**2 * probability
    
sigma = math.sqrt(variance)

print("Standard deviation is %.2f"% sigma)

# 2. Continuous random variables

![](img/random_variables_p2.png)

In [None]:
MU = 1497
SIGMA = 322

# set None if there is no limit
LOWER_BOUND = 1497
UPPER_BOUND = 1819

p = calculate_norm_cdf(LOWER_BOUND or -math.inf, UPPER_BOUND or math.inf, MU, SIGMA)

print("Probability of %.2f < X < %.2f is %.2f"
    % (LOWER_BOUND, UPPER_BOUND, p))

# 3. Combining normal random variables

![](img/random_variables_p3.png)

In [None]:
MUS = [370, 170]
SIGMAS = [24, 7]

# set None if there is no limit
LOWER_BOUND = None
UPPER_BOUND = 575

# True for summation, False for subscraction
IS_SUM = True

sigma = math.sqrt(sum([sigma**2 for sigma in SIGMAS]))

if IS_SUM:
    mu = numpy.mean(MUS) * len(MUS)
else:
    mu = abs(MUS[0] - MUS[1])

p = calculate_norm_cdf(LOWER_BOUND or -math.inf, UPPER_BOUND or math.inf, mu, sigma)
print("Probability is %.2f" % p)

# 4. Binomial random variables

Conditions for binomial random variables:
* the outcome of each trial can be classified as either success or failure
* each trial is independent
* there is fixed number of trials
* probability **p** of success on each trial remains consistant

Calculating binomial probability:

$\binom{n}{k} \cdot p_{success}^k \cdot p_{failure}^{n-k}$

where

$\binom{n}{k}$ is binomial coefficient (n choose k)

$p_{success}^k$ is probability of success of each independant trial to $k^{th}$ power

$p_{failure}^{n-k}$ is probability of failure of each independant trial to $(n-k)^{th}$ power

![](img/random_variables_p4.png)

In [None]:
#TOTAL_TRIALS = 5
#SUCCESS_RATE = 0.6

#TARGET_LOWER_BOUND = 4
#TARGET_UPPER_BOUND = None

TOTAL_TRIALS = 5
SUCCESS_RATE = 0.6

TARGET_LOWER_BOUND = 3
TARGET_UPPER_BOUND = 3

if TARGET_LOWER_BOUND and TARGET_UPPER_BOUND and TARGET_LOWER_BOUND == TARGET_UPPER_BOUND:
    probability = ss.binom.pmf(TARGET_LOWER_BOUND, TOTAL_TRIALS, SUCCESS_RATE)
    print("Probability that we have exactly %d sucesses is %.2f" % (TARGET_LOWER_BOUND, probability))
else:
    # binom.cdf calculates: -inf < p <= TARGET_LOWER_BOUND
    if TARGET_LOWER_BOUND or TARGET_LOWER_BOUND != 0:
        cumulative_probability = ss.binom.cdf(TARGET_LOWER_BOUND-1, TOTAL_TRIALS, SUCCESS_RATE)
        print("Probability that we have >= %d sucesses is %.2f" % (TARGET_LOWER_BOUND, 1- cumulative_probability))
    elif TARGET_UPPER_BOUND or TARGET_UPPER_BOUND != 0:
        cumulative_probability = ss.binom.cdf(TARGET_UPPER_BOUND, TOTAL_TRIALS, SUCCESS_RATE)
        print("Probability that we have <= %d sucesses is %.2f" % (TARGET_UPPER_BOUND, cumulative_probability))

# 5. Binomial mean and standard deviation

![](img/random_variables_p5.png)

In [None]:
TOTAL_TRIALS = 15
SUCCESS_RATE = 0.3

mu = TOTAL_TRIALS * SUCCESS_RATE
print("Mean is: %.1f" % mu)

variance = TOTAL_TRIALS * SUCCESS_RATE * (1-SUCCESS_RATE)
sigma = math.sqrt(variance)
print("Standard deviation is: %.1f" % sigma)

# 6. Geometric random variables

![](img/random_p3.png)

In [None]:
SUCCESS_RATE = 0.6
TARGET_NUMBER = 2

probability = ss.geom.pmf(TARGET_NUMBER, SUCCESS_RATE)
print("Probability that we have exactly %d sucesses is %.2f" % (TARGET_NUMBER, probability))

TARGET_LESS_OR_EQUAL_NUMBER = 2

cumulative_probability = ss.geom.cdf(TARGET_LESS_OR_EQUAL_NUMBER, SUCCESS_RATE)
print("Probability that we have <= %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, cumulative_probability))
# this is the solution for the exact above problem, p < 3 is the same as p <= 2 for a discrete variable
print("Probability that we have > %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, 1-cumulative_probability))