In [102]:
import numpy as np
from scipy.stats import iqr
from scipy.stats import norm
from scipy.stats import mode
import math

In [66]:
def percentile_rank(arr, v):
    """
    Returns the percentile rank of a value in a given array
    """
    return sum(arr <= v) / len(arr) * 100

def outliers_IRR(arr):
    """
    Returns an array with outliers from the input array
    according to the interquartile range rule
    """
    #assumes numpy (np) and iqr from scipy.stats loaded
    maximum = np.percentile(arr,75) + 1.5 * iqr(arr) 
    minimum = np.percentile(arr,25) - 1.5 * iqr(arr)
    
    outliers = arr[(arr < minimum) | (arr > maximum)]
    return outliers

def SS(arr):
    """
    Returns sum of squares of array
    """
    squares = (arr - np.mean(arr))**2
    return sum(squares)

def z_score(X, u, std):
    return (X - u) / std

In [126]:
arr = np.array([13,25,7,15,7,11,24,1,25,9,12,17,16,8,12,8])

In [127]:
np.percentile(arr,75)

16.25

In [112]:
2.4

100.0

In [90]:
outliers_IRR(arr) 

array([], dtype=int64)

In [80]:
np.std(arr,ddof=1)

3.1622776601683795

In [125]:
np.std(arr,ddof=1)

4.69041575982343

In [22]:
#Probability of mean of sample falling within given boudaries
norm.cdf(167,loc=165,scale=(33/9)) - norm.cdf(165,loc=165,scale=(33/9))

0.20727953271553645

In [155]:
norm.ppf(0.9,loc=215,scale=(64/np.sqrt(196)))

220.8585214424896

In [27]:
#Finds q for which P(X < q)
norm.ppf((0.05/2),loc=0,scale=(1/8))

-0.2449954980675068

Sampling Distribution of the Sample Proportion

In [138]:
def sample_proportion_STD(n, pi):
    return np.sqrt(pi*(1-pi)/n)

def isnormal(n, pi):
    if n * pi >= 10 and n * (1 - pi) >= 10:
        return True
    else:
        return False

In [140]:
pi = 0.5
n  = 160
# Calculate P(^p > 0.31)
p = 0.52

print(isnormal(n, pi))

1 - norm.cdf(p,loc=pi,scale=sample_proportion_STD(n, pi))

True


0.3064408117066181

5.4.

In [43]:
pi = 0.28
n  = 220
# Calculate P(^p > 0.31)
p = 0.31

print(isnormal(n, pi))

1 - norm.cdf(p,loc=pi,scale=sample_proportion_STD(n, pi))

True


0.16083517063280395

5.5.

In [143]:
pi = 0.62
n  = 120
# Calculate P(0.59 <= ^p <= 0.65)
pmin = 0.59
pmax = 0.65

if isnormal(n, pi):
    u = pi
    print(norm.cdf(pmax,loc=u,scale=sample_proportion_STD(n, pi))\
    -norm.cdf(pmin,loc=u,scale=sample_proportion_STD(n, pi)))
else:
    print('Conditions for normality not satisfied')

0.5016292230715562


5.6.

In [50]:
pi = 0.12
n  = 140
# Calculate P(^p < 0.13)
p = 0.13

if isnormal(n, pi):
    u = pi
    print(norm.cdf(p,loc=u,scale=sample_proportion_STD(n, pi)))
else:
    print('Conditions for normality not satisfied')

0.6421118936315154


6. Parameter Estimation and Confidence Intervals

In [27]:
def confidence_interval_95(n, sigma, X):
    """
    Returns lower and upper bounds of 95% confidence interval
    """
    error = 1.96 * (sigma / np.sqrt(n))
    print ('({:6.3f},{:6.3f})'.format(X-error, X+error))

def critical_value(C):
    """
    Calculates the critical value z*
    """
    return norm.ppf((100+C)/200,loc=0,scale=1)

def margin_of_error(C, sigma, n):
    """
    Returns margin of error ME
    """
    return critical_value(C) * (sigma/np.sqrt(n))

def confidence_interval(n, sigma, X, C):
    """
    Returns lower and upper bounds of a confidence interval
    """
    error = critical_value(C) * (sigma / np.sqrt(n))
    print ('CI = ({:6.3f},{:6.3f})'.format(X-error, X+error))

def minimum_ss(C, sigma, k):
    """
    Calculates the minimum n sample size for the
    margin of error of the C% confidence interval
    for the population mean, u, to be no larger than k 
    """
    return (critical_value(C) * sigma / k ) ** 2

In [137]:
C = 95
sigma = 5
X = 11.2
n = 126

print(margin_of_error(C,sigma,n))
confidence_interval(n,sigma,X,C)

k = 1
print(minimum_ss(C,sigma,k))

0.8730373476863309
CI = (10.327,12.073)
96.03647051735311


6.1.

In [8]:
n = 134
sigma = 2
X = 11.9
confidence_interval_95(n, sigma, X)

(11.561,12.239)


6.1.

In [154]:
C = 94
sigma = 0.0514
X = 2.463
n = 1000

print(margin_of_error(C,sigma,n))
confidence_interval(n,sigma,X,C)

k = 1
print(minimum_ss(C,sigma,k))

0.0030570620877683093
CI = ( 2.460, 2.466)
0.009345628608470333


6.2.

In [33]:
sigma = 1.4 #Days
n = 64      #Spaceflights
X = 2.28    #Mean travel time (days)
C = 94      #Confidence level (%)

print(margin_of_error(C, sigma, n))
confidence_interval(n,sigma,X,C)

k = 0.3
print(minimum_ss(C,sigma,k))

0.32913888142646885
CI = ( 1.951, 2.609)
77.03637565629663


6.4.

In [40]:
n   = 1000 #sample size
std = 0.0528
X   = 2.311
C   = 98

confidence_interval(n, std, X, C)

CI = ( 2.307, 2.315)


In [134]:
def confidence_interval_prop(n, p, C):
    """
    Returns lower and upper bounds of a confidence interval
    for the population proportion pi
    """
    error = critical_value(C) * sample_proportion_STD(n, p)
    print ('CI = ({:6.3f},{:6.3f})'.format(p-error, p+error))

def margin_of_error_prop(C, p, n):
    """
    Returns margin of error ME
    """
    return critical_value(C) * sample_proportion_STD(n, p)

def minimum_ss_prop(C, k):
    """
    Calculates the minimum n sample size for the
    margin of error of the C% confidence interval
    for the population proportion, pi, to be no larger than k 
    """
    return math.ceil(0.25 * (critical_value(C) / k)**2 )

In [142]:
n = 1934
p = 812/n
C = 93
k = 0.02

if isnormal(n, p):
    confidence_interval_prop(n, p, C)
    print(margin_of_error_prop(C,p,n))
else:
    print('Conditions for normality not satisfied')

print(minimum_ss_prop(C, k))

CI = ( 0.400, 0.440)
0.020334173351130188
2052


6.1.

In [53]:
n = 400
p = 44/n
C = 97

if isnormal(n, p):
    confidence_interval_prop(n, p, C)
    print(margin_of_error_prop(C,p,n))
else:
    print('Conditions for normality not satisfied')

print(minimum_ss_prop(C, k))

CI = ( 0.076, 0.144)
0.033949952539364885
1309


6.2.

In [55]:
n = 1644
p = 543/n
C = 90

if isnormal(n, p):
    confidence_interval_prop(n, p, C)
    print(margin_of_error_prop(C,p,n))
else:
    print('Conditions for normality not satisfied')

k = 0.01
print(minimum_ss_prop(C, k))

CI = ( 0.311, 0.349)
0.01907954928893612
6764


6.4.

In [57]:
n = 220
p = 44/n
C = 93

if isnormal(n, p):
    confidence_interval_prop(n, p, C)
    print(margin_of_error_prop(C,p,n))
else:
    print('Conditions for normality not satisfied')

k = 0.01
print(minimum_ss_prop(C, k))

CI = ( 0.151, 0.249)
0.04886359706001287
8208


6.5.

In [151]:
C = 90
L = 0.85
U = 0.95

ME = (U - L) / 2
p  = (U + L) / 2

n = (critical_value(C)/ME)**2 * (p * (1-p))
print (n)

97.39956434743499


In [63]:
critical_value(C)

1.6448536269514722