# 1. What is GWAS?

In [None]:
using DataFrames
using Distributions
using HypothesisTests
using Plots
using StatsPlots
using Printf

## Genetic variation

### Genotypes and Hardy-Weinberg equilibrium

##### Example 1.1

In [None]:
geno = [ 66, 29, 4]
n = sum(geno) # number of individuals
f = sum(geno .* [ 0, 1, 2]) / (2 * n) # (66*0 + 29*1 + 4*2) / (2 * (66+29+4))

In [None]:
hwe_prop = [ (1-f)^2, 2*f*(1-f), f^2] # these would be the geno type freqa under HWE
DataFrame(obs = geno/n, hwe = hwe_prop) # print the observed genotype freqs and the HWE

In [None]:
# For testing HWE, we use chi-square test even though counts are quite small in the last row
hwe_test = sum( (geno - n*hwe_prop).^2 ./ (n * hwe_prop)) # HWE test statistic
hwe_p = ccdf(Chisq(1), hwe_test) # P-value from the test

bar(0:2, geno, 
    xaxis=("genotype", 0:2),
    leg=false,
    color="skyblue",
    title=@sprintf("rs429358 FIN in 1000G Phase3; HWE P = %.3f", hwe_p)
)

##### Synthetic Data

In [None]:
n = 1000

sample_from_geno = wsample(0:2, geno, n, replace=true) # sample from genotype frequencies
counts_from_geno = [count(x -> x==i, sample_from_geno) for i ∈ 0:2] # compute the vector of occurences

sample_from_hwe = rand(Binomial(2, f), n) # sample n genotypes from Binomial(2, f)
counts_from_hwe = [count(x -> x==i, sample_from_hwe) for i ∈ 0:2]

DataFrame(geno = counts_from_geno/n, hwe = counts_from_hwe/n)

In [None]:
groupedbar(
    ["geno", "HWE"], [reverse(counts_from_geno')/n; reverse(counts_from_hwe)'/n],
    orientation=:h, bar_position=:stack, legend=false, xticks=0:0.2:1
)

In [None]:
df_conf_int = DataFrame(
    geno = counts_from_geno/n,
    geno_lower = map(x -> quantile(Beta(x+0.5, n-x+0.5), 0.025), counts_from_geno),
    geno_upper = map(x -> quantile(Beta(x+0.5, n-x+0.5), 0.975), counts_from_geno),
    hwe = counts_from_hwe/n,
    hwe_lower = map(x -> quantile(Beta(x+0.5, n-x+0.5), 0.025), counts_from_hwe),
    hwe_upper = map(x -> quantile(Beta(x+0.5, n-x+0.5), 0.975), counts_from_hwe)
)

In [None]:
ChisqTest([counts_from_geno counts_from_hwe])