In [95]:
library(HMM)
library(mclust)

In [96]:
dim(faithful)
head(faithful)

Unnamed: 0_level_0,eruptions,waiting
Unnamed: 0_level_1,<dbl>,<dbl>
1,3.6,79
2,1.8,54
3,3.333,74
4,2.283,62
5,4.533,85
6,2.883,55


In [97]:
mygauss <- function(data, mean, Sigma){
    p = nrow(Sigma)
    svd_obj = svd(Sigma)
    D = svd_obj$d
    U = svd_obj$u
    V = svd_obj$v
    Dt = diag(1/sqrt(D))
    A = t(data)
    Sinv = U %*% Dt^2 %*% t(U) 

    #xt = Dt %*% t(U) %*% data
    #mt = Dt %*% t(U) %*% mean
    #a = t(xt-mt) %*% Sinv %*% (xt-mt)
    a = (A - mean) * (Sinv %*% (A - mean))
    a = colSums(a)
    return (exp(-0.5 * a)/((2*pi)^p * (det(Sigma))^0.5))
}

Estep <- function(data, G, para){
    # Your Code
    prob <- para$prob
    mu <- para$mean
    Sigma <- para$Sigma
    loglik <- para$loglik
    n <- nrow(data)
    # need to come up with a way to evalue multivariate gaussian distrbutions,
    # but let's focus on getting the algorithm correct right now.
    # assume we have G means, G Sigmas, G pis
    p = matrix(0, n, G)
    for (k in 1:G){
        p[,k] = prob[k] * mygauss(data, mu[,k], Sigma)
    }
    #p = prob * mygauss(data, mu, Sigma)
    s = rowSums(p)
    p = p / s
  
    # Return the n-by-G probability matrix
    return(p)
  }

Mstep <- function(data, G, para, post.prob){ 
    # Your Code
    # Return the updated parameters
    para$prob = colSums(post.prob) / nrow(post.prob)
    mu = t(data) %*% post.prob
    #mu_sum = colSums(mu)
    mu_sum = colSums(post.prob)
    n = nrow(mu)
    for (k in 1:n){
        mu[k,] = mu[k,] / mu_sum
    }
    para$mean = mu
    Sigma = matrix(0, n, n)
    # This loop is giving the first step Sigma very close agreement with the 
    # R method, but not exact agreement. 
    for (k in 1:G) {
        x = t(data) - mu[,k]
        P = diag(post.prob[,k])
        Sigma = Sigma + x %*% P %*% t(x)   
    }
    
    para$Sigma = Sigma / (colSums(post.prob) * n)
    
    return (para)
  }

loglik <- function(data, G, para){
    # compute loglikelihood
    ll = para$loglik
    
    return (ll)
}

myEM <- function(data, itmax, G, para){
  # itmax: number of of iterations
  # G:     number of components
  # para:  list of (prob, mean, Sigma, loglik)
  d = as.matrix(data)
  for(t in 1:itmax){
    print(t)
    #print(para$Sigma)
    post.prob <- Estep(d, G, para)
    #print(post.prob)
    para <- Mstep(d, G, para, post.prob)
  }
  
  # update para$loglik   
  #para[4] = loglik(data, G, para)
  
  return(para)
}

In [98]:
n <- nrow(faithful)
G <- 2
set.seed(7568)  # replace 234 by the last 4-dig of your University ID
gID <- sample(1:G, n, replace = TRUE)
Z <- matrix(0, n, G)
for(k in 1:G)
  Z[gID == k, k] <- 1 
ini0 <- mstep(modelName="EEE", faithful , Z)$parameters

In [99]:
para0 <- list(prob = ini0$pro, 
              mean = ini0$mean,
              Sigma = ini0$variance$Sigma, 
              loglik = NULL)

In [100]:
myEM(d=faithful, itmax=20, G=G, para=para0)

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20


0,1,2
eruptions,4.29694,2.046786
waiting,80.04005,54.614647

Unnamed: 0,eruptions,waiting
eruptions,0.1030181,0.5866558
waiting,1.044753,49.0457998


In [101]:
# Test E step
para_test <- list(pro = ini0$pro,
                  mean = ini0$mean,
                  variance = ini0$variance,
                  Vinv = NULL)
estep(data=faithful, modelName="EEE", parameters=para_test)$z

0,1,2
1,0.4783272,0.5216728
2,0.4464928,0.5535072
3,0.4819229,0.5180771
4,0.4467317,0.5532683
5,0.5398228,0.4601772
6,0.5561139,0.4438861
7,0.5384024,0.4615976
8,0.4398103,0.5601897
9,0.4818333,0.5181667
10,0.5202056,0.4797944


In [102]:
d = as.matrix(faithful)
G = 2
post.prob <- Estep(d, G, para0)
post.prob

0,1
0.4783272,0.5216728
0.4464928,0.5535072
0.4819229,0.5180771
0.4467317,0.5532683
0.5398228,0.4601772
0.5561139,0.4438861
0.5384024,0.4615976
0.4398103,0.5601897
0.4818333,0.5181667
0.5202056,0.4797944


In [103]:
# Test MStep
testm_res <- mstep(data=faithful, modelName="EEE", z = post.prob)$parameters

In [104]:
para <- Mstep(d, G, para0, post.prob)
para

0,1,2
eruptions,3.58252,3.385804
waiting,71.48603,70.263065

Unnamed: 0,eruptions,waiting
eruptions,1.242539,13.37405
waiting,14.396295,190.79367


In [105]:
testm_res$variance$Sigma

Unnamed: 0,eruptions,waiting
eruptions,1.288278,13.86636
waiting,13.866356,183.77041


# Problem 2

In [324]:
myBW = function(x, para, n.iter = 100){
  # Input:
  # x: T-by-1 observation sequence
  # para: initial parameter value
  # Output updated para value (A and B; we do not update w)
  
  for(i in 1:n.iter){
    para = BW.onestep(x, para)
  }
  return(para)
}

In [379]:
BW.onestep = function(x, para){
    # Input: 
    # x: T-by-1 observation sequence
    # para: mx, mz, and current para values for
    #    A: initial estimate for mz-by-mz transition matrix
    #    B: initial estimate for mz-by-mx emission matrix
    #    w: initial estimate for mz-by-1 initial distribution over Z_1
    # Output the updated parameters after one iteration
    # We DO NOT update the initial distribution w

    T = length(x)
    mz = para$mz
    mx = para$mx
    A = para$A
    B = para$B
    w = para$w
    alp = forward.prob(x, para)
    beta = backward.prob(x, para)
    

    myGamma = array(0, dim=c(mz, mz, T-1))
    myGamma_i = matrix(0, T, mz)
    #######################################
    ## YOUR CODE: 
    ## Compute gamma_t(i,j) P(Z[t] = i, Z[t+1]=j), 
    for (t in 1:(T-1)) {
        for (i in 1:mz) {
          for (j in 1:mz) {
            myGamma[i, j, t] = alp[t,i] * A[i,j] * B[j, x[t+1]] * beta[(t+1),j]
          }
        }
      }
    
    for (t in 1:(T-1)){
        for (i in 1:mz){
            myGamma_i[t,i] = sum(myGamma[i,,t] )
        }
    }

    for (i in 1:mz){
            myGamma_i[T,i] = sum(myGamma[,i,T-1])
        }
    

    ## which are stored in an array, myGamma
    #######################################

    # M-step for parameter A
    #######################################
    ## YOUR CODE: 
    
    A = apply(myGamma, MARGIN=c(1,2), sum)
    A = A/rowSums(A)
    
    #######################################

    # M-step for parameter B
    #######################################
    ## YOUR CODE: 

    B[] = 0
    for (i in 1:mz) {
        for (l in 1:mx) {
          idx = which(x == l)
          B[i,l] = sum(myGamma_i[idx,i]) / sum(myGamma_i[,i])
        }
      }

    #######################################

    para$A = A
    para$B = B
    return(para)
}

In [380]:
forward.prob = function(x, para){
  # Output the forward probability matrix alp 
  # alp: T by mz, (t, i) entry = P(x_{1:t}, Z_t = i)
  T = length(x)
  mz = para$mz
  A = para$A
  B = para$B
  w = para$w
  alp = matrix(0, T, mz)
  
  # fill in the first row of alp
  alp[1, ] = w * B[, x[1]]
  # Recursively compute the remaining rows of alp
  for(t in 2:T){
    tmp = alp[t-1, ] %*% A
    alp[t, ] = tmp * B[, x[t]]
    }
  return(alp)
}

backward.prob = function(x, para){
  # Output the backward probability matrix beta
  # beta: T by mz, (t, i) entry = P(x_{1:t}, Z_t = i)
  T = length(x)
  mz = para$mz
  A = para$A
  B = para$B
  w = para$w
  beta = matrix(1, T, mz)

  # The last row of beta is all 1.
  # Recursively compute the previous rows of beta
  for(t in (T-1):1){
    tmp = as.matrix(beta[t+1, ] * B[, x[t+1]])  # make tmp a column vector
    beta[t, ] = t(A %*% tmp)
    }
  return(beta)
}

In [404]:
myViterbi = function(x, para){
    # Output: most likely sequence of Z (T-by-1)
    T = length(x)
    mz = para$mz
    A = para$A
    B = para$B
    w = para$w
    log.A = log(A)
    log.w = log(w)
    log.B = log(B)

    # Compute delta (in log-scale)
    delta = matrix(0, T, mz) 
    # fill in the first row of delta
    delta[1, ] = log.w + log.B[, x[1]]

    #######################################
    ## YOUR CODE: 
    ## Recursively compute the remaining rows of delta
    for (t in 2:T){
        for (i in 1:mz){
            jVec = rep(0, mz)
            for (j in 1:mz){
                jVec[j] = delta[t-1,j] + log.A[j,i]
            }
            delta[t,i] = max(jVec) + log.B[i, x[t]]
        }
            
    }

    #######################################

    # Compute the most prob sequence Z
    Z = rep(0, T)
    # start with the last entry of Z
    Z[T] = which.max(delta[T, ])
    
    for (t in (T-1):1){
        Z[t] = which.max(delta[t,] + log.A[,Z[t+1]])
    }

    #######################################
    ## YOUR CODE: 
    ## Recursively compute the remaining entries of Z
    #######################################

    return(Z)
}

In [405]:
library(HMM)
hmm0 =initHMM(c("A", "B"), c(1, 2, 3),
              startProbs = ini.w,
              transProbs = ini.A, 
              emissionProbs = ini.B)
Rout = baumWelch(hmm0, data, maxIterations=100, delta=1E-9, pseudoCount=0)
Rout.Z = viterbi(Rout$hmm, data)

In [406]:
data = scan("coding4_part2_data.txt")

mz = 2
mx = 3
ini.w = rep(1, mz); ini.w = ini.w / sum(ini.w)
ini.A = matrix(1, 2, 2); ini.A = ini.A / rowSums(ini.A)
ini.B = matrix(1:6, 2, 3); ini.B = ini.B / rowSums(ini.B)
ini.para = list(mz = 2, mx = 3, w = ini.w,
                A = ini.A, B = ini.B)

myout = myBW(data, ini.para, n.iter = 100)

myout.Z = myViterbi(data, myout)
myout.Z[myout.Z==1] = 'A'
myout.Z[myout.Z==2] = 'B'

In [407]:
options(digits=8)
options()$digits

In [408]:
myout$A
Rout$hmm$transProbs

0,1
0.49793938,0.50206062
0.44883431,0.55116569


Unnamed: 0,A,B
A,0.49793938,0.50206062
B,0.44883431,0.55116569


In [409]:
myout$B
Rout$hmm$emissionProbs

0,1,2
0.22159897,0.20266127,0.57573976
0.34175148,0.17866665,0.47958186


Unnamed: 0,1,2,3
A,0.22159897,0.20266127,0.57573976
B,0.34175148,0.17866665,0.47958186


In [410]:
cbind(Rout.Z, myout.Z)[c(1:10, 180:200), ]
sum(Rout.Z != myout.Z)

Rout.Z,myout.Z
A,A
A,A
A,A
A,A
A,A
A,A
A,A
B,B
A,A
A,A
