# Statistical Power

In [None]:
using Distributions
using DataFrames
using GLM
using Plots, Measures, StatsPlots
using Printf

**Utility functions**
Some of these may have an efficient implementation in other packages, but for keeping
things simple and complete, a crude implementation is given here.

In [None]:
# scale() function of R
scale_data(d::AbstractArray) = (d .- mean(d, dims=1)) ./ std(d, dims=1)

## 3.1 Test statistic under the alternative

##### Example 3.1

In [None]:
n = 500 # individuals
p = 5000 # SNPs for both null and alternative
f = 0.50 # MAF
b_alt = 0.20 # effect size under the alternative hypothesis
x = rand(Binomial(2, f), n) # genotypes at 1 SNP for n individuals
y = scale_data(randn(n)) # random phenotype normalized to have SD = 1
se = stderror(lm(@formula(y ~ x), DataFrame("x"=>x, "y"=>y, copycols=false)))[2] # pick se
b_hat_null = se * randn(p) # estimate under null
b_hat_alt = b_alt .+ se * randn(p) # estiamtes under alternatives

fig = plot(layout=(1, 2), size=(800, 300), margin=7mm)

plot!(fig, subplot=1, xaxis=("z", (-3, 6)), yaxis=("density", (0, 0.5))) # Empty panel for plotting
# draw observed densities of z-scores
density!(b_hat_null/se, lc=:black, lw=2, label=nothing) # Wald stat for null variants
density!(b_hat_alt/se, lc=:red, lw=2, label=nothing) # Wald stat for alternate variants
# Let's add theoretical densities
plot!(Normal(0, 1), lc=:blue, ls=:dash, label=nothing) # for null
plot!(Normal(b_alt/se, 1), lc=:orange, ls=:dash, label=nothing) # for alternative

plot!(fig, subplot=2, xaxis=("z²", (-2, 35)), yaxis=("density", (0, 1))) # Empty panel for plotting
density!(subplot=2, (b_hat_null/se).^2, lc=:black, lw=2, label="NULL Obs'd")
density!(subplot=2, (b_hat_alt/se).^2, lc=:red, lw=2, label="ALT Obs'd")
plot!(subplot=2, Chisq(1), lc=:blue, ls=:dash, label="NULL Theor")
plot!(subplot=2, NoncentralChisq(1, (b_alt/se)^2), lc=:orange, ls=:dash, label="ALT Theor") # NCP = (beta/se)^2
# Let's add significance threholds corresponding to 0.05 and 5e-8
# By definition, the thresholds are always computed under the null
q_thresh = quantile(Chisq(1), 1 .- [0.05, 5e-8]) # For upper quantile, substract from 1.0
vline!(subplot=2, q_thresh', ls=:dot,  lc=[:darkgreen :springgreen], label=nothing)
annotate!(subplot=2,
    (q_thresh[1] + 4, 0.40, text("P < 0.05", :right, 8)),
    (q_thresh[2] + 4, 0.40, text("P < 5e-8", :right, 8))
)

In [None]:
q_thresh = quantile(Chisq(1), 1.0 .- [0.05, 5e-8]) # repeating the threholds in chi-sq
ccdf(NoncentralChisq(1, (b_alt/se)^2), q_thresh) # Correspond to the right tail probabilties