
# PARLA

## Problem
Write a function `run_bootstrap` that:
- constructs a confidence interval of the following types:
    - normal confidence interval
    - percentile confidence interval
    - central confidence interval
- checks for statistically significant differences using bootstrapping
- for more info check the function's docstring below

## Action
I implemented 3 different ways of calculating confidence intervals and combined them into one function

## Result
Successfully implemented the function and it passed all tests

## Learning
- I revised relevant Python, Numpy, and Scipy functionality
- I learned (by implementing) several different ways of calculating confidence intervals

## Application
- I can apply relevant Python, Numpy, and Scipy functionality for similar data-related problems
- I can use the implemented function to calculate different types of confidence intervals


In [16]:

from typing import List, Tuple

import numpy as np
import scipy as sp


In [17]:

def generate_bootstrap_metrics(
    data_one: np.ndarray,
    data_two: np.ndarray,
    bootstrap_iter: int,
    bootstrap_agg_func: str
) -> Tuple[np.ndarray, float]:
    """
    Generates metric values using bootstrapping.

    :param data_one: Metric values in the first group.
    :param data_two: Metric values in the second group.
    :param bootstrap_iter: Number of bootstrap iterations.
    :param bootstrap_agg_func: Aggregation method for the metric.
        Possible values: ['mean', 'quantile 95'].

    :return:
        - bootstrap_metrics: A NumPy array of test statistics computed on bootstrap samples.
        - pe_metric: A float value of the test statistic computed on the original data.
    """

    # Generate a matrix of random samples
    # Each column is a random sample from the original data (data_one or data_two)
    # There will be bootstrap_iter columns in total
    bootstrap_data_one = np.random.choice(data_one, size=(len(data_one), bootstrap_iter))
    bootstrap_data_two = np.random.choice(data_two, size=(len(data_two), bootstrap_iter))

    # calculate mean value
    if bootstrap_agg_func == 'mean':
        bootstrap_metrics = (
            bootstrap_data_two.mean(axis=0) -
            bootstrap_data_one.mean(axis=0)
        )
        pe_metric = data_two.mean() - data_one.mean()
        return bootstrap_metrics, pe_metric

    # calculate 0.95 quantile
    elif bootstrap_agg_func == 'quantile 95':
        q = 0.95
        bootstrap_metrics = (
            np.quantile(bootstrap_data_two, q, axis=0) -
            np.quantile(bootstrap_data_one, q, axis=0)
        )
        pe_metric = np.quantile(data_two, q) - np.quantile(data_one, q)
        return bootstrap_metrics, pe_metric

    else:
        raise ValueError("wrong value of 'bootstrap_agg_func' parameter")


In [18]:

# testing generate_bootstrap_metrics() function
data_one, data_two = np.array([1, 3]), np.array([5, 7])
bootstrap_iter = 10
bootstrap_agg_func = 'mean'
bootstrap_metrics, pe_metric = generate_bootstrap_metrics(
    data_one, data_two, bootstrap_iter, bootstrap_agg_func
)

# testing
# bootstrapped metrics are going to be random, therefore I only compare length
if (len(bootstrap_metrics) == len(np.array([6., 5., 3., 4., 5., 2., 6., 4., 4., 4.])) and
    pe_metric == 4.0
):
    print(f'test_01: passed')
else:
    print(f'test_01: failed')


test_01: passed


In [19]:

def run_bootstrap(
    bootstrap_metrics: np.ndarray,
    pe_metric: float,
    alpha: float,
    bootstrap_ci_type: str
) -> Tuple[List[float], float]:
    """
    Constructs a confidence interval and checks for statistically significant differences using bootstrapping.

    :param bootstrap_metrics: A NumPy array of test statistic values computed from bootstrap samples.
    :param pe_metric: The test statistic value computed from the original data.
    :param alpha: Significance level.
    :param bootstrap_ci_type: Method for constructing the confidence interval.
        Possible values: ['normal', 'percentile', 'pivotal'].

    :return:
        - ci: A list of two floats representing the confidence interval bounds.
        - pvalue: A float value of 0 if a statistically significant difference is found, otherwise 1.
            Note: Computing a true p-value for arbitrary bootstrap CI methods is non-trivial.
            Here, we use boundary values of 0 and 1 as a simplified decision rule.
    """

    # left and right confidence interval bounds
    l, r = 0, 0

    # calculate confidence interval using normal approximation
    if bootstrap_ci_type == 'normal':
        l = pe_metric - sp.stats.norm.ppf(1 - alpha / 2) * bootstrap_metrics.std()
        r = pe_metric + sp.stats.norm.ppf(1 - alpha / 2) * bootstrap_metrics.std()

    # calculate confidence interval using actual bootstrap distribution of the metric
    elif bootstrap_ci_type == 'percentile':
        a = np.quantile(bootstrap_metrics, [alpha / 2, 1 - alpha / 2])
        l = a[0]
        r = a[1]

    # calculate central confidence interval
    elif bootstrap_ci_type == 'pivotal':
        a = np.quantile(bootstrap_metrics, [alpha / 2, 1 - alpha / 2])
        l = 2 * pe_metric - a[1]
        r = 2 * pe_metric - a[0]

    else:
        raise ValueError("wrong value of 'bootstrap_ci_type' parameter")

    ci = [l, r]

    # calculate p-value
    pvalue = 0.
    if l <= 0 <= r:
        pvalue = 1.
    else:
        pvalue = 0.

    return ci, pvalue


In [20]:

# testing run_bootstrap() function
# test case 01
bootstrap_metrics = np.arange(-90, 910)
pe_metric = 600.
alpha = 0.05
bootstrap_ci_types = ['normal', 'percentile', 'pivotal']
results = {
    'normal': [[34., 1166.], 0.0],
    'percentile': [[-65., 884.], 1.0],
    'pivotal': [[316., 1265.], 0.0]
}

# calculate and compare results
for bootstrap_ci_type in bootstrap_ci_types:
    # calculate results
    ci, pvalue = run_bootstrap(bootstrap_metrics, pe_metric, alpha, bootstrap_ci_type)
    ci = np.round(ci)
    pvalue = np.round(pvalue)

    # compare results
    if (
        np.allclose(ci, np.round(results[bootstrap_ci_type][0])) and
        pvalue == np.round(results[bootstrap_ci_type][1])
    ):
        print(f'test_{bootstrap_ci_type}: passed')
    else:
        print(f'test_{bootstrap_ci_type}: failed')


test_normal: passed
test_percentile: passed
test_pivotal: passed
