In [38]:
import numpy as np
from scipy import stats

## Manual T-test

In [128]:
# Generate Sample Data
np.random.seed(555)
N = 10
a = np.random.randn(N) + 2 # mean is 2, std = 1
b = np.random.randn(N) # mean is 0, std = 1


# Calculate the var
var_a = a.var(ddof=1) 
var_b = b.var(ddof=1)
## by default, the numpy does a MAP estimation.
## In this case it's a sample, so we need to divided by N-1
## We can make this by stating ddof (delta degree of freedom) =1

# pooled-standard deviation
s = np.sqrt((var_a + var_b) / 2)

# calculate p-value
t = (a.mean() - b.mean()) / (s * np.sqrt(2.0/N))
df = 2 * N - 2 # degree of freedom
p = (1 - stats.t.cdf(t, df = df) ) * 2

print(f"t: {t}")
print(f"p: {p}")

t: 3.8762918528620314
p: 0.001106351542229822


## Built-in t-test

In [127]:
# built-in t-test
t2, p2 = stats.ttest_ind(a, b) ## t-test from two independent samples

print(f"t2: {t2}")
print(f"p2: {p2}")

t2: 3.8762918528620314
p2: 0.0011063515422298506
