In [None]:
library(HMM)
library(mclust)
library(mvtnorm)

In [None]:
dim(faithful)
head(faithful)

In [None]:
mygauss <- function(data, mean, Sigma){
    p = nrow(Sigma)
    svd_obj = svd(Sigma)
    D = svd_obj$d
    U = svd_obj$u
    V = svd_obj$v
    Dt = diag(1/sqrt(D))
    A = t(data)
    Sinv = U %*% Dt^2 %*% t(U) 

    #xt = Dt %*% t(U) %*% data
    #mt = Dt %*% t(U) %*% mean
    #a = t(xt-mt) %*% Sinv %*% (xt-mt)
    a = (A - mean) * (Sinv %*% (A - mean))
    a = colSums(a)
    return (exp(-0.5 * a)/((2*pi)^p * (det(Sigma))^0.5))
}

Estep <- function(data, G, para){
    # Your Code
    prob <- para$prob
    mu <- para$mean
    Sigma <- para$Sigma
    loglik <- para$loglik
    n <- nrow(data)
    # need to come up with a way to evalue multivariate gaussian distrbutions,
    # but let's focus on getting the algorithm correct right now.
    # assume we have G means, G Sigmas, G pis
    p = matrix(0, n, G)
    for (k in 1:G){
        p[,k] = prob[k] * mygauss(data, mu[,k], Sigma)
    }
    #p = prob * mygauss(data, mu, Sigma)
    s = rowSums(p)
    p = p / s
  
    # Return the n-by-G probability matrix
    return(p)
  }

Mstep <- function(data, G, para, post.prob){ 
    # Your Code
    # Return the updated parameters
    para$prob = colSums(post.prob) / nrow(post.prob)
    mu = t(data) %*% post.prob
    #mu_sum = colSums(mu)
    mu_sum = colSums(post.prob)
    n = nrow(mu)
    for (k in 1:n){
        mu[k,] = mu[k,] / mu_sum
    }
    para$mean = mu
    Sigma = matrix(0, n, n)
    # This loop is giving the first step Sigma very close agreement with the 
    # R method, but not exact agreement. 
    for (k in 1:G) {
        x = t(data) - mu[,k]
        P = diag(post.prob[,k])
        Sigma = Sigma + x %*% P %*% t(x)   
    }
    
    para$Sigma = Sigma / (colSums(post.prob) * n)
    
    return (para)
  }

loglik <- function(data, G, para){
    # compute loglikelihood
    ll = para$loglik
    
    return (ll)
}

myEM <- function(data, itmax, G, para){
  # itmax: number of of iterations
  # G:     number of components
  # para:  list of (prob, mean, Sigma, loglik)
  d = as.matrix(data)
  for(t in 1:itmax){
    print(t)
    #print(para$Sigma)
    post.prob <- Estep(d, G, para)
    #print(post.prob)
    para <- Mstep(d, G, para, post.prob)
  }
  
  # update para$loglik   
  #para[4] = loglik(data, G, para)
  
  return(para)
}

In [None]:
n <- nrow(faithful)
G <- 2
set.seed(7568)  # replace 234 by the last 4-dig of your University ID
gID <- sample(1:G, n, replace = TRUE)
Z <- matrix(0, n, G)
for(k in 1:G)
  Z[gID == k, k] <- 1 
ini0 <- mstep(modelName="EEE", faithful , Z)$parameters

In [None]:
para0 <- list(prob = ini0$pro, 
              mean = ini0$mean,
              Sigma = ini0$variance$Sigma, 
              loglik = NULL)

In [None]:
myEM(d=faithful, itmax=20, G=G, para=para0)

In [None]:
# Test E step
para_test <- list(pro = ini0$pro,
                  mean = ini0$mean,
                  variance = ini0$variance,
                  Vinv = NULL)
estep(data=faithful, modelName="EEE", parameters=para_test)$z

In [None]:
d = as.matrix(faithful)
G = 2
post.prob <- Estep(d, G, para0)
post.prob

In [None]:
# Test MStep
testm_res <- mstep(data=faithful, modelName="EEE", z = post.prob)$parameters

In [None]:
para <- Mstep(d, G, para0, post.prob)
para

In [None]:
testm_res$variance$Sigma