In [1]:
import numpy as np
import pandas as pd

In [2]:
# initialization
A = np.array([[0.7, 0.3], [0.4, 0.6]])
B = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]])
PI = np.array([0.6, 0.4])
T = 6 # length of observation sequence
N = 2 # number of states in model
M = 3 # number of observations model
V = {1, 2, 3} # set of possible observations
O = np.array([2, 2, 3, 1, 3, 2])

In [3]:
def calcAlpha(A, B, O, PI, T, N):
    # initialization
    alpha = np.zeros((T, N))
    for i in range(N):
        alpha[0, i] = PI[i]*B[i, O[0]-1]
        
    # induction
    for t in range(T-1):
        for j in range(N):
            u = 0
            for i in range(N):
                u = u + alpha[t,i]*A[i,j]
            alpha[t+1, j] = u*B[j, O[t+1]-1]
    return alpha

In [4]:
def calcP(alpha, T, N):
    p = 0
    for i in range(N):
        p = p + alpha[T-1, i]
    return p

In [5]:
def calcBeta(A, B, O, N, T):
    # initialization
    beta = np.zeros((T, N))
    for i in range(N):
        beta[T-1, i] = 1
    
    # induction
    for t in range(T-2, -1, -1):
        for i in range(N):
            for j in range(N):
                beta[t, i] = beta[t,i]+A[i,j]*B[j, O[t+1]-1]*beta[t+1,j]
    return beta

In [6]:
def calcGamma(alpha, beta, p, T, N):
    gamma = np.zeros((T, N))
    for t in range(T):
        for i in range(N):
            gamma[t, i] = alpha[t, i]*beta[t, i]/p
    return gamma

In [7]:
def calcDiGamma(alpha, beta, A, B, O, p, N, T):
    di_gamma = np.zeros((T-1, N, N))
    for t in range(T-1):
        for i in range(N):
            for j in range(N):
                di_gamma[t,i,j] = (alpha[t,i]*A[i,j]*B[j,O[t+1]-1]*beta[t+1,j])/p
    return di_gamma

In [8]:
def reEstimate(gamma, di_gamma, T, N, M, O):
    # re-estimate PI
    PI1 = np.zeros((N))
    for i in range(N):
        PI1[i] = gamma[0,i]

    # re-estimate A
    a = np.zeros((N, N))
    for i in range(N):
        for j in range(N):
            num = 0
            den = 0
            for t in range(T-1):
                num = num + di_gamma[t,i,j]
                den = den + gamma[t,i]
            a[i,j] = num/den
        
    # re-estimate B
    b = np.zeros((N, M))
    for j in range(N):
        for k in range(M):
            num = 0
            den = 0
            for t in range(T-1):
                if O[t]-1 == k:
                    num = num + gamma[t,j]
                den = den + gamma[t,j]
            b[j,k] = num/den
            
    return a, b, PI1

In [9]:
def prob3(A, B, PI, O, T, N, M):
    p = 0
    p_temp = 1
    while p < p_temp:
        alpha = calcAlpha(A, B, O, PI, T, N)
        p = calcP(alpha, T, N)
        beta = calcBeta(A, B, O, N, T)
        gamma = calcGamma(alpha, beta, p, T, N)
        di_gamma = calcDiGamma(alpha, beta, A, B, O, p, N, T)
        a, b, PI1 = reEstimate(gamma, di_gamma, T, N, M, O)
#         print('a:', a,'\n\n', 'b:', b, '\n\n', 'pi:', PI1, '\n\n')
        p_temp = calcP(calcAlpha(a, b, O, PI1, T, N), T, N)
        A = a
        B = b
        PI = PI1

    return A, B, PI, p_temp

In [10]:
A, B, PI, p_temp = prob3(A, B, PI, O, T, N, M)

In [11]:
print(A)

[[5.05930855e-17 1.00000000e+00]
 [1.00000000e+00 0.00000000e+00]]


In [12]:
print(B)

[[0.         0.33333333 0.66666667]
 [0.5        0.5        0.        ]]


In [13]:
print(PI)

[1.00000000e+000 6.07653686e-199]


In [14]:
print(p_temp)

0.01851851851851851
