### BIOSTAT 257: HW 5

In [1]:
# load necessary packages; make sure install them first
using BenchmarkTools, CSV, DataFrames, DelimitedFiles, Distributions
using Ipopt, LinearAlgebra, MathOptInterface, MixedModels, NLopt
using PrettyTables, Random, RCall

const MOI = MathOptInterface

MathOptInterface

In [17]:
#import Pkg; Pkg.add("RCall")

[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m CategoricalArrays ─ v0.10.6
[32m[1m    Updating[22m[39m `~/.julia/environments/v1.7/Project.toml`
 [90m [6f49c342] [39m[92m+ RCall v0.13.13[39m
[32m[1m    Updating[22m[39m `~/.julia/environments/v1.7/Manifest.toml`
 [90m [324d7699] [39m[92m+ CategoricalArrays v0.10.6[39m
 [90m [6f49c342] [39m[92m+ RCall v0.13.13[39m
 [90m [1b915085] [39m[92m+ WinReg v0.3.1[39m
[32m[1mPrecompiling[22m[39m project...
[32m  ✓ [39m[90mWinReg[39m
[32m  ✓ [39m[90mCategoricalArrays[39m
[32m  ✓ [39mRCall
  3 dependencies successfully precompiled in 7 seconds (308 already precompiled)


#### Q1. (Optional, 30 bonus pts) Derivatives

1. Prove the following derivatives:

- $\nabla_\boldsymbol{\beta} \ell_i (\boldsymbol{\beta}, \mathbf{L}, \sigma^2) = \mathbf{X_i}^{T} \mathbf{\Omega_i}^{-1}\mathbf{r_i}$,
- $\nabla_{\sigma^2} \ell_i (\boldsymbol{\beta}, \mathbf{L}, \sigma^2) = -\frac{1}{2} tr(\mathbf{\Omega_i}^{-1}) + \frac{1}{2}\mathbf{r_i^{T}\Omega_i^{-2}r_i}$,
-

#### Q2. (20 pts) Objective and gradient evaluator for a single datum

We expand the code from HW3 to evaluate both objective and gradient. I provide my code for HW3 below as a starting point. You do not have to use this code. If your come up faster code, that's even better.

In [146]:
# define a type that holds an LMM datum
struct LmmObs{T <: AbstractFloat}
    # data
    y          :: Vector{T}
    X          :: Matrix{T}
    Z          :: Matrix{T}
    # arrays for holding gradient
    ∇β         :: Vector{T}
    ∇σ²        :: Vector{T}
    ∇Σ         :: Matrix{T}    
    # working arrays
    # TODO: whatever intermediate arrays you may want to pre-allocate
    yty        :: T
    xty        :: Vector{T}
    zty        :: Vector{T}
    storage_p  :: Vector{T}
    storage_q  :: Vector{T}
    storage_q2 :: Vector{T}
    xtx        :: Matrix{T}
    ztx        :: Matrix{T}
    ztz        :: Matrix{T}
    xtz        :: Matrix{T} # added by me
    storage_qq :: Matrix{T}
    storage_qp :: Matrix{T}
end

"""
    LmmObs(y::Vector, X::Matrix, Z::Matrix)

Create an LMM datum of type `LmmObs`.
"""
function LmmObs(
        y::Vector{T}, 
        X::Matrix{T}, 
        Z::Matrix{T}
    ) where T <: AbstractFloat
    n, p, q    = size(X, 1), size(X, 2), size(Z, 2)    
    ∇β         = Vector{T}(undef, p)
    ∇σ²        = Vector{T}(undef, 1)
    ∇Σ         = Matrix{T}(undef, q, q)    
    yty        = abs2(norm(y))
    xty        = transpose(X) * y
    zty        = transpose(Z) * y    
    storage_p  = Vector{T}(undef, p)
    storage_q  = Vector{T}(undef, q)
    storage_q2 = Vector{T}(undef, q)
    xtx        = transpose(X) * X
    ztx        = transpose(Z) * X
    ztz        = transpose(Z) * Z
    xtz        = Matrix{T}(undef, n, q)
    storage_qq = similar(ztz)
    storage_qp = similar(ztx)
    LmmObs(y, X, Z, ∇β, ∇σ², ∇Σ, 
        yty, xty, zty, storage_p, storage_q,
        storage_q2, xtx, ztx, ztz, storage_qq, storage_qp)
end

"""
    logl!(obs::LmmObs, β, L, σ², needgrad=false)

Evaluate the log-likelihood of a single LMM datum at parameter values `β`, `L`, 
and `σ²`. If `needgrad==true`, then `obs.∇β`, `obs.∇Σ`, and `obs.σ² are filled 
with the corresponding gradient.
"""
function logl!(
        obs      :: LmmObs{T}, 
        β        :: Vector{T}, 
        L        :: Matrix{T}, 
        σ²       :: T,
        needgrad :: Bool = true
    ) where T <: AbstractFloat
    n, p, q = size(obs.X, 1), size(obs.X, 2), size(obs.Z, 2)
    ####################
    # Evaluate objective
    ####################    
    # form the q-by-q matrix: M = σ² * I + Lt Zt Z L
    copy!(obs.storage_qq, obs.ztz)
    BLAS.trmm!('L', 'L', 'T', 'N', T(1), L, obs.storage_qq) # O(q^3)
    BLAS.trmm!('R', 'L', 'N', 'N', T(1), L, obs.storage_qq) # O(q^3)
    @inbounds for j in 1:q
        obs.storage_qq[j, j] += σ²
    end
    # cholesky on M = σ² * I + Lt Zt Z L
    LAPACK.potrf!('U', obs.storage_qq) # O(q^3)
    # storage_q = (Mchol.U') \ (Lt * (Zt * res))
    BLAS.gemv!('N', T(-1), obs.ztx, β, T(1), copy!(obs.storage_q, obs.zty)) # z'y - z'xβ
    BLAS.trmv!('L', 'T', 'N', L, obs.storage_q)    # L'(z'y - z'xβ)
    BLAS.trsv!('U', 'T', 'N', obs.storage_qq, obs.storage_q) # A^{-1} L'(z'y - z'xβ)
    # l2 norm of residual vector
    copy!(obs.storage_p, obs.xty)
    rtr  = obs.yty +
        dot(β, BLAS.gemv!('N', T(1), obs.xtx, β, T(-2), obs.storage_p))
    # assemble pieces
    logl::T = n * log(2π) + (n - q) * log(σ²) # constant term
    @inbounds for j in 1:q
        logl += 2log(obs.storage_qq[j, j])
    end
    qf    = abs2(norm(obs.storage_q)) # quadratic form term
    logl += (rtr - qf) / σ² 
    logl /= -2
    ###################
    # Evaluate gradient
    ###################    
    if needgrad
        # TODO: fill ∇β, ∇L, ∇σ² by gradients
        sleep(1e-3) # pretend this step takes 1ms
    end    
    ###################
    # Return
    ###################        
    return logl 
end

logl!

In [147]:
Random.seed!(257)
# dimension
n, p, q = 2000, 5, 3
# predictors
X  = [ones(n) randn(n, p - 1)]
Z  = [ones(n) randn(n, q - 1)]
# parameter values
β  = [2.0; -1.0; rand(p - 2)]
σ² = 1.5
Σ  = fill(0.1, q, q) + 0.9I # compound symmetry 
L  = Matrix(cholesky(Symmetric(Σ)).L)
# generate y
y  = X * β + Z * rand(MvNormal(Σ)) + sqrt(σ²) * randn(n)

# form the LmmObs object
obs = LmmObs(y, X, Z);

#### TESTING THE PARTS OF MY CODE 

In [180]:
logl!(obs, β, L, σ²)

-3256.179335805826

In [181]:
BLAS.trsv!('L', 'T', 'N', obs.storage_qq, obs.storage_q)

3-element Vector{Float64}:
 -11.359497761476838
   3.75738065598062
  -1.4656050135640222

In [176]:
inv(transpose(obs.storage_qq))*L*obs.storage_q # check

3-element Vector{Float64}:
 -9.283241678965894
  2.603352805336954
 -0.6995800464625577

In [184]:
BLAS.trmv!('L', 'N', 'N', L, obs.storage_q)

3-element Vector{Float64}:
 -11.359497761476838
   2.6025967729804638
  -2.2483021205744897

In [182]:
BLAS.gemv!('N', AbstractFloat(-1), obs.xtz, 

3×3 Matrix{Float64}:
 1.0  0.0        0.0
 0.1  0.994987   0.0
 0.1  0.0904534  0.990867