In [9]:
import scipy.stats
import numpy as np
import plotly.express as px
import chart_studio.plotly as py
from plotly.offline import iplot
import toleranceinterval as ti

population=0.95
confidence=0.95
alpha=1-confidence
n_mca_samples=6
n_voxels=5000
mca_samples_range = [6, 10, 30, 100,500,1000]


# Mean estimator

### Unbiased estimator

$M_n = \frac{\sum{X_i}}{n}$

# Variance estimators

### Biased estimator

$S^{2}_n = \frac{\sum{(M_n-\mu)^2}}{n}$

### Unbiased estimator

$S^{2}_{n-1} = \frac{\sum{(M_n-\mu)^2}}{n-1}$

# Standard deviation estimators

### Biased estimator

$S_n = \sqrt{S^2_{n}} = \sqrt{\frac{\sum{(M_n-\mu)^2}}{n}}$

### Biased estimator 2

$S_{n-1} = \sqrt{S^2_{n-1}} = \sqrt{\frac{\sum{(M_n-\mu)^2}}{n-1}}$

### Unbiased estimator

$\hat{S}_{n-1} = c_4(n)^{-1}\sqrt{S^2_{n-1}} = c_4(n)^{-1} \sqrt{\frac{\sum{(M_n-\mu)^2}}{n-1}}$
with
$c_4(n) = \sqrt{\frac{2}{n-1}}\frac{\Gamma(n-2)}{\Gamma(\frac{n-1}{2})}$

# Prediction interval

If $X$ is normally distributed, then PI is an two-sided prediction interval with an $\alpha$ confidence level for one future observation:

$ PI = M \pm \sqrt{1 + \frac{1}{n}}.t_{1-\alpha/2,n-1}.S $

# Tolerance interval

If $X$ is normally distributed, then PI is an two-sided tolerance interval with an $\alpha$ confidence level that contains $p\%$ of the population:

$$ TI = M \pm k_2.S $$

with $k_2$ the solution of the integral equation:

$$
\sqrt{\frac{2n}{\pi}} \int_{0}^{\infty} 
\left( 1 - P_Q 
    \left( 
        Q \geq \frac{\chi^2_{1,p}(x^2)}{k^2_2}
    \right) e^{-\frac{x^2}{2S^2}dx}
\right) = 1 - \alpha 
$$



In [10]:
# Estimator for the mean

def mean(x):
    return np.mean(x)

# Estimators for the variance

def var_biased(x):
    '''
    S^2_{n} = sum((X-mu)^2)/N
    '''
    _mean = mean(x)
    _n = x.size
    _diff = np.sum(np.square(x - _mean))
    return _diff/_n

def var_unbiased(x):
    '''
    S^2_{n-1} = sum((X-mu)^2)/(N-1)
    '''
    _mean = mean(x)
    _n = x.size - 1
    _diff = np.sum(np.square(x - _mean))
    return _diff/_n

# Estimators for the standard deviation

def std_biased_1(x):
    '''
    std = sqrt( S^2_n )
    '''
    return np.sqrt(var_biased(x))

def std_biased_2(x):
    '''
    std = sqrt( S^2_{n-1} )
    '''
    return np.sqrt(var_unbiased(x))

def c4(n):
    '''
    c4(n) = sqrt(2/n-1) (gamma(n/2)/gamma(n-1/2))
    '''
    gamma = scipy.special.loggamma
    return np.sqrt(2/(n-1)) * np.exp(gamma(n/2)-gamma((n-1)/2))

def std_unbiased(x):
    '''
    std = sqrt( S^2_{n-1} ) / c4(n)
    '''
    n = x.size
    _std = std_biased_2(x)/c4(n)
    return _std

# Function to draw samples normally distributed

def draw_samples(n):
    x = np.random.normal(size=n)
    return x

# Function for testing prediction interval

def is_in_prediction_interval(x, mean, std, confidence):
    '''
    Check x values are in prediction interval
    alpha = confidence
    c = sqrt(1 + 1/n)
    t = t_{1-alpha/2, n-1}
    (mean - t * c * std, mean + t * c * std )
    '''
    n = x.size
    coef_t = scipy.stats.t.ppf(1-alpha/2, df=n-1)
    c = np.sqrt(1 + 1/n)
    success = np.logical_and(mean - coef_t * c * std <= x, x <= mean + coef_t * c * std)
    return success

def get_success_prediction(x, mean, std, confidence):
    trials = x.size
    success = is_in_prediction_interval(x, mean, std, confidence)
    nb_success = np.count_nonzero(success)
    nb_fail = trials - nb_success
    return trials, nb_success, nb_fail

def test_prediction(n, std, confidence):    
    x = draw_samples(n)
    _mean = mean(x)
    _std = std(x)
    y = draw_samples(n)
    nb_trials, nb_success, nb_fail = get_success_prediction(y, _mean, _std, confidence)
    return nb_success

def run_prediction(std, n_mca_samples, n_voxels, confidence):
    np.random.seed(0)
    nb_successes = [test_prediction(n_mca_samples, std, confidence) for i in range(n_voxels)]
    per_succ = np.sum(nb_successes)/(n_voxels*n_mca_samples) * 100
    return per_succ


# Functions for testing tolerance interval


def compute_k2(n,p,c):
    return ti.twoside.normal_factor(n, population, confidence)
    

def is_in_tolerance_interval(x, mean, std, population, confidence):
    '''
    Check x values are in tolerance interval:
    (mean - k_2 * std, mean + k_2 * std )
    with confidence and population.
    '''
    n = x.size
    k2 = compute_k2(n, population, confidence)
    success = np.logical_and(mean - k2 * std <= x, x <= mean + k2 * std)
    return success

def get_success_tolerance(x, mean, std, population, confidence):
    trials = x.size
    success = is_in_tolerance_interval(x, mean, std, population, confidence)
    nb_success = np.count_nonzero(success)
    nb_fail = trials - nb_success
    return trials, nb_success, nb_fail

def test_tolerance(n, std, population, confidence):    
    x = draw_samples(n)
    _mean = mean(x)
    _std = std(x)
    y = draw_samples(n)
    nb_trials, nb_success, nb_fail = get_success_tolerance(y, _mean, _std, population, confidence)
    return nb_success

def run_tolerance(std, n_mca_samples, n_voxels, population, confidence):
    np.random.seed(0)
    nb_successes = [test_tolerance(n_mca_samples, std, population, confidence) for i in range(n_voxels)]
    per_succ = np.sum(nb_successes)/(n_voxels*n_mca_samples) * 100
    return per_succ

# Main function

def main_prediction(n_voxels, mca_samples_range, std, confidence):
    print(f'Test prediction interval with {std.__name__} for confidence ({confidence})')
    for n_mca_samples in mca_samples_range:
        ratio = run_prediction(std, n_mca_samples, n_voxels, confidence)
        print(f'n_voxels: {n_voxels}, n_mca_samples: {n_mca_samples:5}, success ratio: {ratio:.2f}%')
        
def main_tolerance(n_voxels, mca_samples_range, std, population, confidence):
    print(f'Test tolerance interval with {std.__name__} for confidence ({confidence}) and population ({population})')
    for n_mca_samples in mca_samples_range:
        ratio = run_tolerance(std, n_mca_samples, n_voxels, population, confidence)
        print(f'n_voxels: {n_voxels}, n_mca_samples: {n_mca_samples:5}, success ratio: {ratio:.2f}%')        

In [11]:
main_prediction(n_voxels, mca_samples_range, std_biased_1, confidence)
main_tolerance(n_voxels, mca_samples_range, std_biased_1, population, confidence)

Test prediction interval with std_biased_1 for confidence (0.95)
n_voxels: 5000, n_mca_samples:     6, success ratio: 93.62%
n_voxels: 5000, n_mca_samples:    10, success ratio: 94.01%
n_voxels: 5000, n_mca_samples:    30, success ratio: 94.63%
n_voxels: 5000, n_mca_samples:   100, success ratio: 94.91%
n_voxels: 5000, n_mca_samples:   500, success ratio: 94.99%
n_voxels: 5000, n_mca_samples:  1000, success ratio: 94.99%
Test tolerance interval with std_biased_1 for confidence (0.95) and population (0.95)
n_voxels: 5000, n_mca_samples:     6, success ratio: 98.80%
n_voxels: 5000, n_mca_samples:    10, success ratio: 98.69%
n_voxels: 5000, n_mca_samples:    30, success ratio: 98.05%
n_voxels: 5000, n_mca_samples:   100, success ratio: 97.07%
n_voxels: 5000, n_mca_samples:   500, success ratio: 96.09%
n_voxels: 5000, n_mca_samples:  1000, success ratio: 95.78%


In [12]:
main_prediction(n_voxels, mca_samples_range, std_biased_2, confidence)
main_tolerance(n_voxels, mca_samples_range, std_biased_2, population, confidence)

Test prediction interval with std_biased_2 for confidence (0.95)
n_voxels: 5000, n_mca_samples:     6, success ratio: 95.12%
n_voxels: 5000, n_mca_samples:    10, success ratio: 95.01%
n_voxels: 5000, n_mca_samples:    30, success ratio: 95.00%
n_voxels: 5000, n_mca_samples:   100, success ratio: 95.02%
n_voxels: 5000, n_mca_samples:   500, success ratio: 95.01%
n_voxels: 5000, n_mca_samples:  1000, success ratio: 95.01%
Test tolerance interval with std_biased_2 for confidence (0.95) and population (0.95)
n_voxels: 5000, n_mca_samples:     6, success ratio: 99.19%
n_voxels: 5000, n_mca_samples:    10, success ratio: 99.02%
n_voxels: 5000, n_mca_samples:    30, success ratio: 98.21%
n_voxels: 5000, n_mca_samples:   100, success ratio: 97.16%
n_voxels: 5000, n_mca_samples:   500, success ratio: 96.10%
n_voxels: 5000, n_mca_samples:  1000, success ratio: 95.79%


In [13]:
main_prediction(n_voxels, mca_samples_range, std_unbiased, confidence)
main_tolerance(n_voxels, mca_samples_range, std_unbiased, population, confidence)    

Test prediction interval with std_unbiased for confidence (0.95)
n_voxels: 5000, n_mca_samples:     6, success ratio: 95.88%
n_voxels: 5000, n_mca_samples:    10, success ratio: 95.53%
n_voxels: 5000, n_mca_samples:    30, success ratio: 95.17%
n_voxels: 5000, n_mca_samples:   100, success ratio: 95.07%
n_voxels: 5000, n_mca_samples:   500, success ratio: 95.02%
n_voxels: 5000, n_mca_samples:  1000, success ratio: 95.01%
Test tolerance interval with std_unbiased for confidence (0.95) and population (0.95)
n_voxels: 5000, n_mca_samples:     6, success ratio: 99.30%
n_voxels: 5000, n_mca_samples:    10, success ratio: 99.14%
n_voxels: 5000, n_mca_samples:    30, success ratio: 98.29%
n_voxels: 5000, n_mca_samples:   100, success ratio: 97.19%
n_voxels: 5000, n_mca_samples:   500, success ratio: 96.11%
n_voxels: 5000, n_mca_samples:  1000, success ratio: 95.80%
