In [1]:
import numpy as np

In [251]:
class HMM:
    def __init__(self,N,M,pi=None,A=None,B=None):
        #可能的状态数
        self.N = N
        #可能的观测数
        self.M = M
        #初始状态概率向量
        self.pi = pi
        #状态转移概率矩阵
        self.A = A
        #观测概率矩阵
        self.B = B
    def rdistribution(self,dist):
        r = np.random.rand()
        for ix,p in enumerate(dist):
            if r < p:
                return ix
            r -= p
    def generate(self,T):
        i = self.rdistribution(self.pi)
        o = self.rdistribution(self.B[i])
        observed_data = [o]
        for _ in range(T-1):
            i = self.rdistribution(self.A[i])
            o = self.rdistribution(self.B[i])
            observed_data.append(o)
        return observed_data
pi = np.array([0.25,0.25,0.25,0.25])
A = np.array([
    [0,1,0,0],
    [0.4,0.6,0,0],
    [0,0.4,0,0.6],
    [0,0,0.5,0.5]])
B = np.array([
    [0.5,0.5],
    [0.6,0.4],
    [0.2,0.8],
    [0.3,0.7]])
N = 4
M = 2

In [122]:
hmm = HMM(N,M,pi,A,B)
o = hmm.generate(5)

In [71]:
#基于前向算法计算条件概率
def prob_calc(o):
    #初始值
    alpha = pi * B[:,o[0]] # 1*4 数组 alpha_1(i) = pi_i * b_i(o_1) i指的是状态
    #递推
    for o in o[1:]:
        alpha_next = np.empty(4)
        for j in range(4):
            alpha_next[j] = np.sum(A[:,j] * alpha * B[j,o]) 
        alpha = alpha_next
    return alpha.sum() #对i_T求和 ，得到观测数据O的边际概率P(O|μ)
# o = [1, 0, 1, 0, 1]
p = prob_calc(o)
print('给定观测序列:\n{}\n生成概率为：\n{}'.format(o,p))

给定观测序列:
[0, 0, 0, 1, 0]
生成概率为：
0.025782335249999996


In [252]:
### 序列标注问题和维特比算法
def viterbi_decode1(O,A,B,pi):
    '''
    输入
    o:观测序列
    输出
    path：最优隐状态路径 
    '''
    # 序列长度和初始观测
    T,o= len(O),O[0]
    # 初始化delta变量 
    delta =  pi * B[:,o] # 1*4 数组 delta_1(i) = pi_i * b_i(o_1) i指的是状态
    # print(delta)
    # 初始化varphi变量
    varphi = np.zeros((T, A.shape[1]), dtype=int)
    path = [0] * T
    for i in range(1,T):
        delta = delta.reshape(-1,1)
        tmp = delta * A 
        delta = np.max(tmp, axis=0) * B[:,O[i]]
        varphi[i,:] = np.argmax(tmp,axis=0)
        # print('t = {},\ntmp = {},\ndelta{} = {}'.format(i+1,tmp,i+1,delta))
        # varphi[i,:] = np.argmax(tmp,axis=0)
    # print(varphi)
    #终止
    path[-1] = np.argmax(delta)
    #回溯最优路径
    for i in range(T-1,0,-1):
        path[i-1] = varphi[i,path[i]]
    return path
O = [1, 0, 1, 1, 0]
# pi = np.array([0.2,0.2,0.4])
# A = np.array([
#     [0.5,0.2,0.3],
#     [0.3,0.5,0.2],
#     [0.2,0.3,0.5]])
# B = np.array([
#     [0.5,0.5],
#     [0.4,0.6],
#     [0.7,0.3]])
# O = [0,1,0]
# O = hmm.generate(5)
path = viterbi_decode1(O,A,B,pi)
print('给定观测序列：{}\n最可能的隐藏序列：{}'.format(O,path))

给定观测序列：[1, 0, 1, 1, 0]
最可能的隐藏序列：[0, 1, 1, 0, 1]
