# Stats checkpoint answers

In [1]:
def median(xs):
    """
    When given a standard Python list of numbers, this function will
    return the median value. There are a few cases to consider:

    1. xs is an empty list: return None

    2. A list with one element: returns that element

    3. A list with an odd number of elements: Returns the middle value.

    4. A list with an even count of elements: Returns the mean of
    two points in the middle.

    This function is ALLOWED to mutate the list during sorting. It doesn't
    need to make a copy first.

    Parameters
    ----------
    xs: A standard Python list of float or integer values.

    Returns
    -------
    The mode as described above
    """
    if len(xs) == 0:
        return None
    elif len(xs) == 1:
        return xs[0]
    elif len(xs) % 2 == 1:
        xs.sort()
        middle_idx = len(xs) // 2
        return xs[middle_idx]
    else:
        xs.sort()
        a_idx = len(xs) // 2
        b_idx = len(xs) // 2 - 1
        return (xs[a_idx] + xs[b_idx]) / 2

In [2]:
import numpy as np

def numpy_mean(xs):
    """
    This method returns the mean of xs.

    You may assume that xs represents an entire poulation.

    Parameters
    ----------
    xs: A NumPy array of values.

    Returns
    -------
    The mean of xs.
    """
    return xs.sum() / xs.shape[0]

In [3]:
import numpy as np

def numpy_variance(xs):
    """
    This method returns the variance of xs.

    You may assume that xs represents an entire poulation.

    Parameters
    ----------
    xs: A NumPy array of values.

    Returns
    -------
    The variance of xs.
    """
    n = xs.shape[0]
    mu = xs.sum() / n
    diffs = xs - mu
    sum_sq_diffs = np.sum(diffs ** 2)
    return sum_sq_diffs / n

In [4]:
def numpy_std_dev(xs):
    """
    This method returns the standard deviation of xs.

    You may assume that xs represents an entire poulation.

    Parameters
    ----------
    xs: A NumPy array of values.

    Returns
    -------
    The standard deviation of xs.
    """
    n = xs.shape[0]
    mu = xs.sum() / n
    diffs = xs - mu
    sum_sq_diffs = np.sum(diffs ** 2)
    variance = sum_sq_diffs / n 
    return np.sqrt(variance)

In [5]:
import numpy as np

def short_numpy_mean(xs):
    """
    This method returns the mean of xs.

    You may assume that xs represents an entire poulation.

    Parameters
    ----------
    xs: A NumPy array of values.

    Returns
    -------
    The mean of xs.
    """
    return np.mean(xs)

In [6]:
import numpy as np

def short_numpy_var(xs):
    """
    This method returns the variance of xs.

    You may assume that xs represents an entire poulation.

    Parameters
    ----------
    xs: A NumPy array of values.

    Returns
    -------
    The variance of xs.
    """
    return np.var(xs)

In [7]:
import numpy as np

def short_numpy_std_dev(xs):
    """
    This method returns the standard deviation of xs.

    You may assume that xs represents an entire poulation.

    Parameters
    ----------
    xs: A NumPy array of values.

    Returns
    -------
    The standard deviation of xs.
    """
    return np.std(xs)

In [8]:
def bernoulli_pmf(k, p):
    """
    Calculates and return the bernoulli PMF for a succesful trial.

    Parameters
    ----------
    k: The outcome of the trial. 1 is success and 0 is failure
    p: Probability of a succesful trial

    Returns
    -------
    If k == 1, the probability of success. If k == 0, the proability
    of failure.
    """
    if k == 1:
        return p
    else:
        return 1 - p

def bernoulli_var(p):
    """
    Calculates and return the variance of a Bernoulli distribution

    Parameters
    ----------
    p: Probability of a succesful trial

    Returns
    -------
    Variance of the bernoulli distribution.
    """
    return p * (1 - p)

In [9]:
def fact(x):
    """
    This function calculates and returns the value of x!

    Parameters
    ----------
    x: Number to find the factorial for in x!

    Returns
    -------
    Value of x!
    """
    result = 1
    for i in range(x, 0, -1):
        result *= i
    return result
### DO NOT MODIFY THE FUNCTION ABOVE ###

def binomial_coeff(n, k):
    """
    This function calculates and returns the value of the binomial
    coefficient for n and k. Think "n choose k"

    Parameters
    ----------
    n: Total number of objects in set.
    k: Number of objects in subset.

    Returns
    -------
    Value of binomial coefficient for variables n and k.
    """
    return fact(n) / (fact(k)*(fact(n-k)))

def binomial_pmf(k, n, p):
    """
    This function calculates and returns the value of the probability mass.

    Parameters
    ----------
    k: number of successful trials
    n: number of trials
    p: probability of a successful trial

    Returns
    -------
    Value of binomial coefficient for variables n and k.
    """
    return binomial_coeff(n, k) * p**k * (1-p)**(n-k)

$$ P(X = 3) = {1000 \choose 3} \times 0.005^3 \times (1 - 0.005)^{1000 - 3} = 0.1403 $$

In [10]:
### KEEP THE IMPORTS BELOW ###
from math import factorial as f
from math import exp
### KEEP THE IMPORTS ABOVE ###

def poisson_pmf(k, lam):
    """
    Calculates the probability of k occurrences in a given unit of
    time, given t the expected number of occurrences in that
    unit of time is as lam. Use the Poisson probability mass
    function to calculate this.

    For this challenge, you have access to the function exp()
    which raises Euler's number to the power given in the 
    argument to the exp() function.

    Parameters
    ----------
    k: The number of occurrences we seek the probability for.
    lam: The lambda parameter of the Poisson.

    Returns
    -------
    Probability that you will see k occurrences in a unit of time
    given lam
    """
    return lam**k * exp(lam * -1) / f(k)