# Statistical Power

## 3.1 Test statistic under the alternative


##### Example 3.1

In [None]:
n = 500 # individuals
p = 5000 # SNPs for both null and alternetive
f = 0.50 # MAF
b.alt = 0.20 # effect size under alternative hypothesis
x = rbinom(n, 2, f) # genotypes at 1 SNP for n individuals
y = scale(rnorm(n)) # random phenotype noemalized to have sample sd = 1
se = summary( lm(y ~ x) )$coeff[2, 2] # pick SE, and assume it stays constant and independent of beta
b.hat.null = rnorm(p, 0, se) # estimates under null
b.hat.alt = rnorm(p, b.alt, se) # estiamte under alternative

par(mfrow=c(1, 2))
# Draw the observed densities of z-scores
plot(NULL, xlim=c(-3, 6), ylim=c(0, 0.5), xlab="z",
     ylab="density", col="white") # empty panel for plotting
lines(density((b.hat.null/se)), col="black", lwd=2) # Wald stat for null variants
lines(density((b.hat.alt/se)), col="red", lwd=2) # Wald stat for alternative variants
# add theoretical densities for z-scores
x.seq = seq(-3, 6, 0.01)
lines(x.seq, dnorm(x.seq, 0, 1), col="blue", lty=2) # for null
lines(x.seq, dnorm(x.seq, b.alt/se, 1), col="orange", lty=2) # for alternatives

# Draw observed densities of z^2
plot(NULL, xlim=c(0, 35), ylim=c(0, 1), xlab=expression(z^2),
    ylab="density", col="white") # empty panel for plotting
lines(density((b.hat.null/se)^2), col="black", lwd=2) # chi-square stat for null variants
lines(density((b.hat.alt/se)^2), col="red", lwd=2) # chi-square stat for alternative variants
# Let's add theoretical densities of the chi-square dsitributions
x.seq = seq(0, 35, 0.01)
lines(x.seq, dchisq(x.seq, df=1, ncp=0), col="blue", lty=2) # NCP = 0 for null
lines(x.seq, dchisq(x.seq, df=1, ncp=(b.alt/se)^2), col="orange", lty=2) # NCP = (beta/se)^2 for alternate
legend("topright", leg=c("NULL obs'd", "ALT obs'd", "NULL theor", "ALT theor"),
      col=c("black", "red", "blue", "orange"),
      lty=c(1, 1, 2, 2), lwd=c(2, 2, 1, 1))
# Let's add significance thresholds corresponding to 0.05 and 5e-8
# By definition, the thresholds are always computed under the null
q.thersh = qchisq(c(0.05, 5e-8), df=1, ncp=0, lower=FALSE)
abline(v=q.thersh, col=c("darkgreen", "springgreen"), lty=3)
text(q.thersh+2, c(0.4, 0.4), c("P < 0.05", "P < 5e-8"))

In [None]:
q.thresh = qchisq(c(0.05, 5e-8), df=1, ncp=0, lower=FALSE) # repeating thersholds in chi-square
pchisq(q.thresh, df=1, ncp=(b.alt/se)^2, lower=FALSE) # corresponding to the right tail probabilites

## 3.2 Ingredients of power

##### Example 3.2

In [None]:
f = 0.5
b.alt = 0.2
sigma = sqrt(1 - 2*f*(1-f)*b.alt^2) # error sd after SNP effect is accounted for
ns = seq(500, 4000, 10) # candidate values for n
ses = sigma / sqrt(ns*2*f*(1-f)) # SEs corresponding to each candidate n
q.thersh = qchisq(5e-8, df=1, ncp=0, lower=FALSE) # chi-sqr thershold corresponding to 5e-8
pwr = pchisq(q.thersh, df=1, ncp=(b.alt/ses)^2, lower=FALSE) # power at alpha=5e-8 for VECTOR of SE values
plot(ns, pwr, col="darkgreen", xlab="n", ylab="power",
    main=paste0("QT sd=1; MAF=", f, "; beta =", b.alt), t="l", lwd=1.5)
abline(h=0.90, lty=2)

In [None]:
# Let's output the first n that gives power >= 90%
ns[min(which(pwr >= 0.90))]

In [None]:
2 * f * (1-f) * b.alt^2

##### Example 3.3

In [None]:
f = 0.5
y.explained = 0.005
b.alt = sqrt(y.explained / (2*f*(1-f))) # this is beta that explains 0.5%
sigma = sqrt(1 - y.explained) # error sd after SNP effect is accounted for
ns = seq(1000, 12000, 10) # candidate n
ses = sigma / sqrt(ns * 2 * f * (1-f)) # SE corresponding to each n
q.thresh = qchisq(5e-8, df=1, ncp=0, lower=F) # threshold corresp alpha=5e-8
pwr = pchisq(q.thresh, df=1, ncp=(b.alt/ses)^2, lower=F) # power at alphs = 5e-8
plot(ns, pwr, col="darkgreen", xlab="n", ylab="power",
     main=paste0("QT sd=1; MAF=", f, "; beta=", b.alt), t="l", lwd=1.5)
abline(h=0.90, lty=2)

In [None]:
# Let's output n that is the first that gives power >= 90%
ns[min(which(pwr >= 0.90))]

##### Example 3.6

In [None]:
n = c(500000, 60000)
phi = c(15000, 30000) / n
cbind(n, phi, eff.n=n*phi*(1-phi))

##### Example 3.7

In [None]:
b = log(1.1) # b is log-odds, approximately GRR for a low prevalance disease
n = 20000
f = 0.40
phi = 0.50
pchisq(qchisq(5e-8, df=1, lower=F), df=1, ncp=2*f*(1-f)*n*phi*(1-phi)*b^2, lower=F)

## 3.3 Why well powered studies are so important?

In [None]:
sz.res = read.table("http://www.mv.helsinki.fi/home/mjxpirin/GWAS_course/material/sz_res.txt",
                    as.is = TRUE, header = TRUE)
sz.res[1,] # See what data we have

In [None]:
# Let's plot the known SZ variants on frequency - effect size coordinates
# And draw some pwoer curves there ar genome-wide significance threshold
maf = sz.res[, "Frq_control"] # Not yet MAF but allele 1 frequency
maf[maf > 0.50] = 1 - maf[maf > 0.50] # Make it to MAF: always less than 0.50
b = abs(log(sz.res[,"Combined_OR"])) # effect sie on log-odds-ratio scale with positive sign
pw.thresh = 0.50
p.threshold = 5e-8
plot(maf, b, ylim=c(0, 0.3), xlim=c(0.01, 0.50), xlab="MAF",
     ylab="EFFECT SIZE (in logs-odds-ratio)", xaxt="n", yaxt="n",
     log="x",
    main=substitute(paste("Power = ", pw.thresh, " at ", alpha, " = ", p.threshold),
                   list(pw.thresh=pw.thresh, p.threshold=p.threshold)),
    cex.main=1.8, cex.lab=1.3, pch=19)
axis(1, at=c(0.01, 0.02, 0.05, 0.10, 0.24, 0.50), cex.axis=1.3)
axis(2, at=c(0.00, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30), cex.axis=1.3)
grid()

q = qchisq(p.threshold, df=1, lower=F) # Chisq value corresp. significance threhold
# matrix of numbers of cases (col1) and controls (col2)
Ns = matrix(c(3332, 3587, 10000, 10000, 34000, 45600), ncol=2, byrow=T)
cols = c("green", "cyan", "blue")

f = seq(0.01, 0.50, length=200)
b = seq(0, 0.30, length=200)
legends = c()
par(mar=c(6, 6, 5, 1))
for(set in 1:nrow(Ns)){
    pw = rep(NA, length(b)) # power at each candidate b
    b.for.f = rep(NA, length(f)) # for each f gives the b value that leads to target power
    for(i in 1:length(f)) {
        pw = pchisq(q, df=1, ncp=Ns[set, 1]*Ns[set, 2] / sum(Ns[set,])*2*f[i]*(1-f[i])*b^2, lower=F)
        b.for.f[i] = b[min(which(pw > pw.thresh))]
    }
    lines(f, b.for.f, t="l", col=cols[set], lwd=1.6)
    legends=c(legends, paste(Ns[set,], collapse="/")) # make a "#case/#controls" tag for legend
}
legend("bottomleft", lty=c(1, 1), col=cols, legend=legends, lwd=2, cex=1.3)