# Imports

In [1]:
import numpy as np
import numpy.random as npr
import scipy as scp

# Exercise 1

# Exercise 2

## Part A

In [3]:
def waldStat(x, y):
    return ( np.mean(x) - np.mean(y) ) / np.sqrt( np.var(x)/x.size + np.var(y)/y.size )

def waldP(x_lim):
    return 2*(1 - scp.stats.norm.cdf(x_lim))

twain = np.array([.225, .262, .217, .240, .230, .229, .235, .217])
snodgrass = np.array( [.209, .205, .196, .210, .202, .207, .224, .223, .220, .201] )
print( waldStat(twain, snodgrass) )
print(waldP(3.945))

3.9446199162650353
7.979997860951826e-05


The Wald statistic for this test is approximately 3.945. The rejection region is defined by $R = \{|Z| > 3.945\}$, where Z corresponds to the Wald statistic. Thus, the p-value is given by:

$$ P\left( |Z| > 3.945 \right) = 2\cdot P\left(Z > 3.945 \right) = 2 \cdot \left( 1 - P(Z \leq 3.945) \right) = 2\cdot (1 - \Phi(3.945) ) $$

because the distribution of Z is approximately normal with mean 0 and variance 1 for a Wald test. For a normal distribution, this probability is given by:

$$ 2\cdot \left(1 - \int_{-\infty}^{3.945} \frac{1}{\sqrt{2\pi}} e^{ -\frac{x^2}{2} } dx \right) = 2\cdot(1-\Phi(3.945))  \approx 0.0000798 $$

The confidence interval is given by $\overline{X} - \overline{Y} \pm 2\sqrt{ \frac{s_1^2}{8} + \frac{s_2^2}{10} } $

In [None]:
mean_diff = np.mean(twain) - np.mean(snodgrass)
var_metric = 2*np.sqrt( np.var(twain)/twain.size + np.var(snodgrass)/snodgrass.size )
conf_interval = [ mean_diff - var_metric, mean_diff + var_metric]

print(f'X_mean - Y_mean, with X being Twain and Snodgrass Y: {mean_diff}')

print(f'Sample variance metric: {var_metric}')

print(f'Confidence interval: {conf_interval}')

The p-value indicates that the result is statistically significant, but the confidence interval $(0.1, 0.3)$ is small. Since the confidence interval is so small, you could argue that the authors aren't different people

## Part B


In [None]:
B = 10000
p_val = 1/(B+1)
permute_space = np.concatenate( (twain, snodgrass) )

npr.seed(0)
for i in range(B):
    permute = npr.permutation(permute_space)
    p_val += ( waldStat( permute[:8], permute[8:] ) > 3.945 ) / (B+1)

print(p_val)

The p-value using permutation testing is almost 10 times bigger than the one calculated in the previous section, so the evidence against the null is weaker.

# Exercise 3

## Part A