[Link to videos and exercises](https://www.khanacademy.org/math/statistics-probability/random-variables-stats-library)

# 1. Discrete random variables

$\sigma_X = \sqrt{\sum{(x_i - \mu_X)^2 p_i}}$

where:

$\sigma_X$ is a standard deviation of a discrete random variable X

$x_i$ is each specific outcome

$\mu_X$ is the mean of X

$p_i$ is probability of each specifc outcome

![](img/random_variables_p1.png)

In [1]:
import math

MU = 100
VALUES = [200, -99800]
PROBALITIES = [0.999, 0.001]

variance = 0
for value, probability in zip(VALUES, PROBALITIES):
    variance += (value - MU)**2 * probability
    
sigma = math.sqrt(variance)

print("Standard deviation is %.2f"% sigma)

Standard deviation is 3160.70


# 2. Continuous random variables

![](img/random_variables_p2.png)

In [2]:
import math
import scipy.stats as ss

MU = 1497
SIGMA = 322

LOWER_BOUND = 1497
# use math.inf if upper limit is unlimited
UPPER_BOUND = 1819

cdf_lower = ss.norm.cdf(LOWER_BOUND, MU, SIGMA)
cdf_upper = ss.norm.cdf(UPPER_BOUND, MU, SIGMA)
print("Probability of %.2f < X < %.2f is %.2f" 
    % (LOWER_BOUND, UPPER_BOUND, cdf_upper-cdf_lower))

Probability of 1497.00 < X < 1819.00 is 0.34


# 3. Combining normal random variables

![](img/random_variables_p3.png)

In [3]:
import math
import numpy
import scipy.stats as ss

MUS = [370, 170]
SIGMAS = [24, 7]
LOWER_BOUND = -math.inf
UPPER_BOUND = 575
# True for difference, False for sum
DIFF_FLAG = False

def calculate_cdf(lower_bound, upper_bound, mu, sigma):
    cdf_lower = ss.norm.cdf(lower_bound, mu, sigma)
    cdf_upper = ss.norm.cdf(upper_bound, mu, sigma)
    range = cdf_upper-cdf_lower
    return range

sigma = math.sqrt(sum([sigma**2 for sigma in SIGMAS]))

if DIFF_FLAG:
    mu = abs(MUS[0] - MUS[1])
else:
    mu = numpy.mean(MUS) * len(MUS)

p = calculate_cdf(LOWER_BOUND, UPPER_BOUND, mu, sigma)
print("Probability is %.2f" % p)

Probability is 0.92


# 4. Binomial random variables

![](img/random_p2.png)

In [4]:
# this snippet covers a both cumulative (as in the example) probability
# as well as probability of a specific discrete random variable
# calculating probability of an event

import scipy.stats as ss

TOTAL_TRIALS = 3
SUCCESS_RATE = 0.9

# example 1: exact value (binompdf)

TARGET_SUCCESSES_NUMBER = 2

probability = ss.binom.pmf(TARGET_SUCCESSES_NUMBER, TOTAL_TRIALS, SUCCESS_RATE)
print("Probability that we have exactly %d sucesses is %.2f" % (TARGET_SUCCESSES_NUMBER, probability))

# example 2: range (binomcdf)

TARGET_LESS_OR_EQUAL_NUMBER = 1

cumulative_probability = ss.binom.cdf(TARGET_LESS_OR_EQUAL_NUMBER, TOTAL_TRIALS, SUCCESS_RATE)
print("Probability that we have <= %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, cumulative_probability))
# this is the solution for the exact above problem, p >= 2 is the same as 1 - (p < 1) for a discrete variable
print("Probability that we have > %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, 1-cumulative_probability))

Probability that we have exactly 2 sucesses is 0.24
Probability that we have <= 1 sucesses is 0.03
Probability that we have > 1 sucesses is 0.97


# 5. Binomial mean and standard deviation

![](img/random_p4.png)

In [5]:
# mean and std of a binomial random variable

import math

sample = 15
success_rate = 0.3

mean = sample * success_rate
print(mean)

variance = sample * success_rate * (1-success_rate)
std = math.sqrt(variance)
print(std)

4.5
1.7748239349298849


# 6. Geometric random variables

![](img/random_p3.png)

In [6]:
# geometric random variable

import scipy.stats as ss

SUCCESS_RATE = 0.6
TARGET_NUMBER = 2

probability = ss.geom.pmf(TARGET_NUMBER, SUCCESS_RATE)
print("Probability that we have exactly %d sucesses is %.2f" % (TARGET_NUMBER, probability))

TARGET_LESS_OR_EQUAL_NUMBER = 2

cumulative_probability = ss.geom.cdf(TARGET_LESS_OR_EQUAL_NUMBER, SUCCESS_RATE)
print("Probability that we have <= %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, cumulative_probability))
# this is the solution for the exact above problem, p < 3 is the same as p <= 2 for a discrete variable
print("Probability that we have > %d sucesses is %.2f" % (TARGET_LESS_OR_EQUAL_NUMBER, 1-cumulative_probability))


Probability that we have exactly 2 sucesses is 0.24
Probability that we have <= 2 sucesses is 0.84
Probability that we have > 2 sucesses is 0.16
