# 基于隐马尔可夫模型的笔势识别

3. 为每个元音训练单独的隐马尔可夫模型（每个元音单独聚类，然后分别学习 HMM，输入是与每个 2D 点关联的聚类类别号） 。对于每个测试数据，针对每个 HMM 计算其对数似然，即 log P(O|M)，并获取给出最高对数似然的 HMM 类别，即对测试数据进行分类判别。给出混淆矩阵并描述你的发现。

In [1]:
from data import train_set, val_set, gesture_list
from kmeans import clusters, K
import matplotlib.pyplot as plt

# colors = ['b', 'g', 'r', 'c', 'm']
# plt.figure(figsize=(4, 4))
# for k in range(5):
#     for point in clusters[k]:
#         plt.scatter(point[0], point[1], c=colors[k])
# plt.show()

ended at round 19


清洗数据

In [2]:
# 为训练集中每个点添加聚类类标号，得到观测o1, o2, ..., on
import copy
import math

# def get_cluster_num(point):
#     # 使用k=1的KNN得到某个点的聚类编号
#     x = point[0]
#     y = point[1]
#     for cluster_num in range(len(clusters)):
#         for cluster_point in clusters[cluster_num]:
#             if cluster_point[0] == x and cluster_point[1] == y:
#                 return cluster_num
#     return K

def get_cluster_num(point):
    # 用k=1的KNN得到测试集每个点在Kmeans上的观测值
    x = point[0]
    y = point[1]
    distances_cluster_nums = []
    for clsuter_num in range(len(clusters)):
        for cluster_point in clusters[clsuter_num]:
            distance = math.sqrt(
                (cluster_point[0] - x) ** 2 +
                (cluster_point[1] - y) ** 2
            )
            distances_cluster_nums.append((distance, clsuter_num))
            
    min_distance_cluster_num = min(distances_cluster_nums, key=lambda dis: dis[0])
    return min_distance_cluster_num[1]

# train_set_with_cluster_num = copy.deepcopy(train_set)
# for gesture_name in gesture_list:
#     for word_i in range(len(train_set[gesture_name])):
#         for point_i in range(len(train_set[gesture_name][word_i])):
#             # 找这个点的聚类编号
#             point = train_set[gesture_name][word_i][point_i]
#             clsuter_num = get_cluster_num(point)
#             train_set_with_cluster_num[gesture_name][word_i][point_i] = (point[0], point[1], point[2], clsuter_num)

In [3]:
train_obs = {
    'a': [], 'e': [], 'i': [], 'o': [], 'u': [],
}

for gesture_name in gesture_list:
    for word in train_set[gesture_name]:
        word_obs = []
        for word_point in word:
            cluster_num = get_cluster_num(word_point)
            word_obs.append(cluster_num)
        train_obs[gesture_name].append(word_obs)

In [4]:
# 都取第10个word
# for gesture_name in gesture_list:
#     print(f'{gesture_name} 观测序列 {train_obs[gesture_name][9]}')

In [5]:
from hmmlearn import hmm
import numpy as np

model = {}
# 隐藏状态数量
n_states = 3
# 观测状态数量（Kmeans的K值）
n_observations = K
for gesture_name in gesture_list:
    model[gesture_name] = hmm.CategoricalHMM(n_components=n_states, n_iter=100)
    model[gesture_name].n_features = n_observations
    observations = train_obs[gesture_name]
    observations = [np.array(obs).reshape(-1, 1) for obs in observations]
    lengths = [obs.shape[0] for obs in observations]
    # print(lengths)
    model[gesture_name].fit(np.concatenate(observations), lengths)
    # print(f'model of {gesture_name}:')
    # print("估计 pi:\n", model[gesture_name].startprob_)
    # print("估计 A:\n", model[gesture_name].transmat_)
    # print("估计 B:\n", model[gesture_name].emissionprob_)


推理 logP(O|M)

In [6]:
val_obs = {
    'a': [], 'e': [], 'i': [], 'o':[], 'u':[],
}

for gesture_name in gesture_list:
    for word in val_set[gesture_name]:
        word_obs = []
        for word_point in word:
            cluster_num = get_cluster_num(word_point)
            word_obs.append(cluster_num)
        val_obs[gesture_name].append(word_obs)

In [7]:
stat = {
    'a': [], 'e': [], 'i': [], 'o':[], 'u':[],
}

for gesture_name in gesture_list:
    for obs in val_obs[gesture_name]:
        obs_seq = np.array(obs).reshape(-1, 1)
        log_probs = []
        for model_gesture in model:
            log_prob = model[model_gesture].score(obs_seq)
            log_probs.append((log_prob, model_gesture))
            
        # 选出概率最大的模型
        max_prob_model_gesture = max(log_probs, key=lambda prob_tuple: prob_tuple[0])[1]
        stat[gesture_name].append(max_prob_model_gesture)
        # print(f'真实{gesture_name}, 推理{max_prob_model_gesture}')

In [8]:
confusion_stat = {}
record_lengths = {'a': 0, 'e': 0, 'i': 0, 'o': 0, 'u': 0,} # 对于每一个字母测试用例的数量
for stat_key in stat:
    count = {'a': 0, 'e': 0, 'i': 0, 'o': 0, 'u': 0,}
    for record in stat[stat_key]:
        count[record] += 1
        record_lengths[stat_key] += 1
    confusion_stat[stat_key] = count

In [9]:
import copy

confusion_matrix = copy.deepcopy(confusion_stat)
for vowel_true in confusion_matrix:
    for vowel_infer in confusion_matrix[vowel_true]:
        confusion_matrix[vowel_true][vowel_infer] /= record_lengths[vowel_true]

confusion_matrix

{'a': {'a': 0.95, 'e': 0.0, 'i': 0.0, 'o': 0.05, 'u': 0.0},
 'e': {'a': 0.0, 'e': 0.95, 'i': 0.0, 'o': 0.05, 'u': 0.0},
 'i': {'a': 0.0, 'e': 0.0, 'i': 0.95, 'o': 0.0, 'u': 0.05},
 'o': {'a': 0.0, 'e': 0.0, 'i': 0.0, 'o': 1.0, 'u': 0.0},
 'u': {'a': 0.0, 'e': 0.0, 'i': 0.0, 'o': 0.0, 'u': 1.0}}