[Link to videos and exercises](https://www.khanacademy.org/math/statistics-probability/random-variables-stats-library)

In [1]:
# a cell to import modules and define helper functions
import math
import numpy
import scipy.stats as ss

def calculate_norm_cdf(lower, upper, mu, sigma):
    
    lower_bound = lower if isinstance(lower, int) else -math.inf
    upper_bound = upper if isinstance(upper, int) else math.inf
    
    cdf_lower = ss.norm.cdf(lower_bound, mu, sigma)
    cdf_upper = ss.norm.cdf(upper_bound, mu, sigma)
    interval = cdf_upper-cdf_lower
    
    print("Probability of %.2f < X < %.2f is %.2f" % (lower_bound, upper_bound, interval))
    
    return interval

def calculate_combinations(n, k):
    # calculating binomial coefficient
    return math.factorial(n) // (math.factorial(n-k) * math.factorial(k))

# 1. Discrete random variables

$\sigma_X = \sqrt{\sum{(x_i - \mu_X)^2 p_i}}$

where:

$\sigma_X$ is a standard deviation of a discrete random variable X

$x_i$ is each specific outcome

$\mu_X$ is the mean of X

$p_i$ is probability of each specifc outcome

![](img/random_variables_p1.png)

In [4]:
MU = 100
VALUES = [200, -99800]
PROBALITIES = [0.999, 0.001]

variance = 0
for value, probability in zip(VALUES, PROBALITIES):
    variance += (value - MU)**2 * probability
    
sigma = math.sqrt(variance)

print("Standard deviation is %.2f"% sigma)

Standard deviation is 3160.70


# 2. Continuous random variables

![](img/random_variables_p2.png)

In [5]:
MU = 1497
SIGMA = 322

# set to None if there is no limit
LOWER_BOUND = 1497
UPPER_BOUND = 1819

p = calculate_norm_cdf(LOWER_BOUND, UPPER_BOUND, MU, SIGMA)

Probability of 1497.00 < X < 1819.00 is 0.34


# 3. Combining normal random variables

![](img/random_variables_p3.png)

In [2]:
MUS = [370, 170]
SIGMAS = [24, 7]

# set None if there is no limit
LOWER_BOUND = None
UPPER_BOUND = 575

# True for summation, False for subscraction
IS_SUM = True

sigma = math.sqrt(sum([sigma**2 for sigma in SIGMAS]))

if IS_SUM:
    mu = numpy.mean(MUS) * len(MUS)
else:
    mu = abs(MUS[0] - MUS[1])

p = calculate_norm_cdf(LOWER_BOUND, UPPER_BOUND, mu, sigma)

Probability of -inf < X < 575.00 is 0.92


# 4. Binomial random variables

Conditions for binomial random variables:
* the outcome of each trial can be classified as either success or failure
* each trial is independent
* there is fixed number of trials
* probability **p** of success on each trial remains consistant

Calculating binomial probability:

$\binom{n}{k} \cdot p_{success}^k \cdot p_{failure}^{n-k}$

where

$\binom{n}{k}$ is binomial coefficient (n choose k)

$p_{success}^k$ is probability of success of each independant trial to $k^{th}$ power

$p_{failure}^{n-k}$ is probability of failure of each independant trial to $(n-k)^{th}$ power

![](img/random_variables_p4.png)

In [7]:
TOTAL_TRIALS = 5
SUCCESS_RATE = 0.6

TARGET_LOWER_BOUND = 4
TARGET_UPPER_BOUND = None

def calculate_binom_cdf(lower, upper, trials, success_rate):
    
    lower_bound = lower if isinstance(lower, int) else -math.inf
    upper_bound = upper if isinstance(upper, int) else math.inf
    
    cdf_lower = ss.binom.cdf(lower_bound-1, trials, success_rate)
    cdf_upper = ss.binom.cdf(upper_bound, trials, success_rate)
    interval = cdf_upper-cdf_lower
    
    print("Probability of %.2f <= X <= %.2f successes is %.2f"% (lower_bound, upper_bound, interval))

    return interval

# pmf
if TARGET_LOWER_BOUND == TARGET_UPPER_BOUND:
    probability = ss.binom.pmf(TARGET_LOWER_BOUND, TOTAL_TRIALS, SUCCESS_RATE)
    print("Probability that we have exactly %d sucesses is %.2f" % (TARGET_LOWER_BOUND, probability))
    
# cdf
else:
    p = calculate_binom_cdf(TARGET_LOWER_BOUND, TARGET_UPPER_BOUND, TOTAL_TRIALS, SUCCESS_RATE)

Probability of 4.00 <= X <= inf successes is 0.34


# 5. Binomial mean and standard deviation

![](img/random_variables_p5.png)

In [8]:
TOTAL_TRIALS = 15
SUCCESS_RATE = 0.3

mu = TOTAL_TRIALS * SUCCESS_RATE
print("Mean is: %.1f" % mu)

variance = TOTAL_TRIALS * SUCCESS_RATE * (1-SUCCESS_RATE)
sigma = math.sqrt(variance)
print("Standard deviation is: %.1f" % sigma)

Mean is: 4.5
Standard deviation is: 1.8


# 6. Geometric random variables

Conditions for geometric random variables:
* variable in question is the number of trials until the first success
* the outcome of each trial can be classified as either success or failure
* each trial is independent
* probability **p** of success on each trial remains consistant

![](img/random_variables_p6.png)

In [9]:
SUCCESS_RATE = 0.4

TARGET_LOWER_BOUND = 0
TARGET_UPPER_BOUND = 2

def calculate_geom_cdf(lower, upper, success_rate):
    
    lower_bound = lower if isinstance(lower, int) else -math.inf
    upper_bound = upper if isinstance(upper, int) else math.inf
    
    cdf_lower = ss.geom.cdf(lower_bound-1, success_rate)
    cdf_upper = ss.geom.cdf(upper_bound, success_rate)
    interval = cdf_upper-cdf_lower
    
    print("Probability %.2f <= X <= %.2f until the first try is %.2f"% (lower_bound, upper_bound, interval))

    return interval

# pmf
if TARGET_LOWER_BOUND == TARGET_UPPER_BOUND:
    probability = ss.geom.pmf(TARGET_LOWER_BOUND, SUCCESS_RATE)
    print("Probability that first success will be on %dth try is: %.2f" % (TARGET_LOWER_BOUND, probability))

# cdf
else:
    p = calculate_geom_cdf(TARGET_LOWER_BOUND, TARGET_UPPER_BOUND, SUCCESS_RATE)

Probability 0.00 <= X <= 2.00 until the first try is 0.64


# 7. More on expected value
![](img/random_variables_p7.png)

In [10]:
probability_number = 1/10
probability_letter = 1/26

profitA = 10405 - 5
profitB = 100 - 5
profitC = 0 - 5

pA = probability_number * probability_number * probability_letter
# pA is a subset of pB
pB = probability_letter - pA
pC = 1 - pA - pB

expected_value = profitA*pA + profitB*pB + profitC*pC

print("Expected value is: %.2f" % expected_value)

Expected value is: 2.81
