<a href="https://colab.research.google.com/github/vlozg/speech_hmm/blob/main/Test_DiagHMM_016.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LƯU Ý: NOTEBOOK NÀY CHỈ DÙNG ĐỂ SHOW KẾT QUẢ CHẠY, KHÔNG NÊN CHẠY LẠI NOTEBOOK NÀY 
(VÌ TRONG NÀY CÓ CODE LƯU LẠI PRETRAINED MODEL LÊN DRIVE SẼ BỊ XÓA)

# Speech to text with HMM

- **Bài toán**: Chuyển giọng nói thành văn bản
    - **Input**: Đoạn ghi âm chứa nội dung là các số từ 0 đến 9
    - **Output**: Phân lớp của đoạn ghi âm

# Các biến thiết lập cho thử nghiệm

In [None]:
n_mfcc_ceptrum = 12
n_delta_features = 1
n_mixtures = 3
fsdd_split = 0.3
wolfram_split = 0.3
experiment_id = '016'

In [None]:
n_mfcc_features = n_mfcc_ceptrum * (1+n_delta_features)
n_mfcc_features

24

# Import và cài đặt thư viện

In [None]:
# cài lib. note: cài xong phải restart runtime
!pip install pydub
!pip install pomegranate



In [None]:
# Xác thực google để upload/download qua google drive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Quản lý file, folder
import os
from shutil import copyfile, rmtree
import random

# Xử lý audio
import librosa
import librosa.display
from scipy.io import wavfile

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pomegranate # Thư viện cho mô hình xác suất
from pomegranate import *

# Tải dữ liệu và lấy xác thực Google

Dữ liệu dùng để huấn luyện và đánh giá, còn xác thực google thì dùng để upload/download mô hình trên drive.

In [None]:
%%capture
# download wolfram
if not os.path.isfile('./dataset_1_wolfram.zip'):
  !gdown --id 115tIAitBNeJC0DwrP-ZyJ6RS3TyWN0qD
  !unzip -o dataset_1_wolfram.zip

# dowload FSDD
if not os.path.isfile('./dataset_2_FSDD.zip'):
  !gdown --id 1Ua9zlPBc0Fv4xGHSQTb7eIvUh_dqFI6P
  !unzip -o dataset_2_FSDD.zip

# download self recorded audio
!gdown --id 1lH_k1AYMVlJvodtZdD7OK2zkdPXxlW9i

In [None]:
# Lấy xác thực google để upload/download file
auth.authenticate_user()
gauth =  GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Hàm xử lý âm thanh

In [None]:
def minmax_scale(wave):
  return ((wave - wave.min()) / (wave.max() - wave.min()) - 0.5)

def standard_scale(wave):
  return ((wave - wave.mean()) / wave.std())

def scaleAddNoise(wave):
  return standard_scale(wave) + 0.2*np.random.normal(size=wave.shape)

'''
  Hàm đọc audio
  
  Return:
    sample_rate(int): thường cố định là 16000
    wave(np.array): waveform
'''
def read_audio(full_audio_path):
  # Bắt buộc âm thanh đọc vào phải cùng sample rate là 16000
  sample_rate, wave =  wavfile.read(full_audio_path)
  if sample_rate != 16000:
    wave, sample_rate =  librosa.load(full_audio_path, sr=16000) # Hàm đọc của librosa quá chậm, scipy nhanh gấp 5-10 lần
  else:
    wave=wave/32768 # Chuẩn hóa về số thực
  return wave, sample_rate

'''
  Hàm đọc audio, có thêm khoảng trắng ở 2 đầu audio,
  scale lại waveform và thêm white noise
  
  Return:
    sample_rate(int): thường cố định là 16000
    wave(np.array): waveform
'''
def read_process_audio(full_audio_path):
  wave, sample_rate = read_audio(full_audio_path)
  wave = np.pad(wave, (2000,2000), 'constant', constant_values=(0.0,0.0))
  wave = scaleAddNoise(wave)
  return wave, sample_rate

def unvoiced_frame(wave, sample_rate, min_len = 10):
  rms = librosa.feature.rms(y=wave)[0]
  r_normalized = standard_scale(rms)
  p = np.exp(r_normalized) / (1 + np.exp(r_normalized))
  # Giảm dần threshold xuống nếu không đủ min_len để train HMM
  thresh = 0.4
  slice_ = p > thresh
  while (slice_.sum() < min_len):
    thresh-=0.05
    slice_ = p > thresh
  return slice_

def extract_mfcc(wave, sample_rate, trim=True):
  S = librosa.feature.melspectrogram(y=wave, sr=sample_rate, n_mels=40)
  if trim:
    S = S[:,unvoiced_frame(wave, sample_rate)]
  mfccs = librosa.feature.mfcc(S=librosa.power_to_db(S), n_mfcc=n_mfcc_ceptrum+2, lifter=40)[2:,:].T/800
  return mfccs

def mfcc_delta_features(mfcc, order):
  if order==0:
    return mfcc
  dmfcc = librosa.feature.delta(mfcc, order=order)
  return dmfcc

def full_mfcc_from_file(full_audio_path, trim=True):
  wave, sr = read_process_audio(full_audio_path)
  mfccs = extract_mfcc(wave, sr, trim)
  full_mfccs = mfccs
  if n_delta_features >= 1:
    mfccs_d1 = mfcc_delta_features(mfccs, 1)
    full_mfccs = np.hstack([full_mfccs,mfccs_d1])
  if n_delta_features >= 2:
    mfccs_d2 = mfcc_delta_features(mfccs, 2)
    full_mfccs = np.hstack([full_mfccs,mfccs_d2])
  return full_mfccs

# Hàm bổ trợ

In [None]:
def buildDataSet(dir, trim=True):
    # Filter out the wav audio files under the dir
    fileList = [f for f in os.listdir(dir) if os.path.splitext(f)[1] == '.wav']
    dataset = {}
    for fileName in fileList:
        tmp = fileName.split('.')[0]
        label = tmp.split('_')[1]

        # label = filename.split('_')[0]
        feature = full_mfcc_from_file(dir+fileName, trim)
        if label not in dataset.keys():
            dataset[label] = []
            dataset[label].append(feature)
        else:
            exist_feature = dataset[label]
            exist_feature.append(feature)
            dataset[label] = exist_feature
    return dataset

def makeTrainTestDir(mainFolder, filenames, rate):
    paths = [f'{mainFolder}',
             f'{mainFolder}/train_audio',
             f'{mainFolder}/test_audio']

    for path in paths:
        try:
            os.mkdir(path)
        except:
            rmtree(path)
            os.mkdir(path)

    random.seed(1)
    random.shuffle(filenames)
    splitPoint = int(len(filenames)*rate)
    trainFilenames = filenames[:splitPoint]
    testFilenames = filenames[splitPoint:]

    for filename in trainFilenames:
        copyfile(filename[0], f"{paths[1]}/{filename[1]}")

    for filename in testFilenames:
        copyfile(filename[0], f"{paths[2]}/{filename[1]}")

# Setup thư mục chứa data

In [None]:
def formatFilenameFSDD(dir):
    filenames = []
    count = 0
    for filename in os.listdir(dir):
        tmp = str(count) + '_' + filename.split('_')[0] + '.wav'
        filenames.append((f"{dir}/{filename}", tmp))
        count += 1
    return filenames

filenames = formatFilenameFSDD('./dataset_2_FSDD')
makeTrainTestDir('fsdd', filenames, fsdd_split)

In [None]:
def formatFilenameWolfram(dir):
    filenames = []
    count = 0
    folders = os.listdir(dir)
    for folder in folders:
        for filename in os.listdir(f'{dir}/{folder}'):
            tmp = str(count) + '_' + folder.split('_')[0] + '.wav'
            filenames.append((f"{dir}/{folder}/{filename}", tmp))
            count += 1

    return filenames

filenames = formatFilenameWolfram('./dataset_1_wolfram')
makeTrainTestDir('wolfram', filenames, wolfram_split)

# Huấn luyện mô hình (có thể bỏ qua vì mô hình đã save trên drive)

## Hàm train mô hình

In [None]:
def Generate_DiagGMM(full_fset, n_features, n_states=5, n_cmps=3):
  dists = []
  for state_i in range(n_states):
    if n_cmps > 1:
        mixtures = []
        for cmp_i in range(n_cmps):
          cmp = IndependentComponentsDistribution(tuple(
              NormalDistribution(*np.random.random(2))
              for feat_i in range(n_features)
              ))
          mixtures.append(cmp)
        comp = GeneralMixtureModel(mixtures)
    else:
        comp = IndependentComponentsDistribution(tuple(
            NormalDistribution(*np.random.random(2))
            for feat_i in range(n_features)
            ))
    dists.append(comp)

  return dists

In [None]:
# Generate progressive HMM model
def left_right_GMMHMM(seed_sample, x_dim, n_states=10, n_modals=9, diag=True, random=0):
  rng = np.random.RandomState(random)
  if random!=0:
    init_prob = lambda: rng.rand(1)[0]
    
  model = HiddenMarkovModel()
  if diag:
    states = [State(gmm, name=f"H{i}") for i, gmm in enumerate(Generate_DiagGMM(seed_sample, x_dim, n_states, n_modals))]
  else:
    states = [State( GeneralMixtureModel([MultivariateGaussianDistribution.blank(x_dim) for i in range(n_modals)]),
                    name=f"H{i}" ) for i in range(n_states)]
  model.add_states(states)
  model.add_transition(model.start, states[0], 1)
  for i in range(n_states-1):
    model.add_transition(states[i], states[i], 0.5)
    model.add_transition(states[i], states[i+1], 0.5)
  model.add_transition(states[n_states-1], states[n_states-1], 0.5)
  model.add_transition(states[n_states-1], model.end, 0.5)
  model.bake()
  return model

In [None]:
tol = 1000
def train_GMMHMM(dataset, input_dim, n_hidden_state, n_gauss_modal, diag=True, failed_label_return=False, report_fail=None):
    GMMHMM_Models = dict()

    for label in dataset.keys():
        print(f"Training model detect {label}")
        for i in range(tol):
          model = left_right_GMMHMM(dataset[label][0], input_dim, n_hidden_state, n_gauss_modal, diag=diag)
          _, imprv = model.fit(dataset[label], verbose=True, multiple_check_input=False,return_history=True)  # get optimal parameters
          if ~np.isnan(imprv.improvements[-1]):
            break
          model = None
        if model is None and failed_label_return:
          report_fail.append(label)
        GMMHMM_Models[label] = model

    return GMMHMM_Models

In [None]:
def retrain_specific_class(hmmModels, label, dataset, input_dim, n_hidden_state, n_gauss_modal, diag=True):
  for i in range(tol):
    model = left_right_GMMHMM(dataset[label][0], input_dim, n_hidden_state, n_gauss_modal, diag=diag)
    _, imprv = model.fit(dataset[label], verbose=True, multiple_check_input=False,return_history=True)  # get optimal parameters
    if ~np.isnan(imprv.improvements[-1]):
      break
  hmmModels[label] = model
  return hmmModels

## Hàm test mô hình

In [None]:
# test model
def evaluateModel(testDataset, model):
  if (len(testDataset) == 0):
    return
  digit_clf = BayesClassifier(list(dict(sorted(model.items())).values()))
  true_cnt = 0
  total = 0
  for label in sorted(testDataset.keys()):
      features = np.array(testDataset[label], dtype='object')
      pred = digit_clf.predict(features)
      iter_cnt = (pred == int(label)).sum()
      iter_total = len(features)
      total += iter_total
      true_cnt += iter_cnt
      print(f"{label}: {iter_cnt}/{iter_total} ({iter_cnt/iter_total})")
  print("Final recognition rate is %.2f"%(100.0*true_cnt/total), "%")

## Đọc và tiền xử lý data

In [None]:
# prepare data for training
master_path = 'fsdd'

trainDir = master_path + '/train_audio/'
trainDataSet_fsdd = buildDataSet(trainDir)
print("Finish prepare the training data")

# prepare data for testing
testDir = master_path + '/test_audio/'
testDataSet_fsdd = buildDataSet(testDir)
print("Finish prepare the test data")

Finish prepare the training data
Finish prepare the test data


In [None]:
# prepare data for training
master_path = 'wolfram'

trainDir = master_path + '/train_audio/'
trainDataSet_wolfram = buildDataSet(trainDir)
print("Finish prepare the training data")

# prepare data for testing
testDir = master_path + '/test_audio/'
testDataSet_wolfram = buildDataSet(testDir)
print("Finish prepare the test data")

Finish prepare the training data
Finish prepare the test data


In [None]:
trainDataSet = trainDataSet_fsdd
if wolfram_split > 0:
  for label in trainDataSet_fsdd.keys():
    trainDataSet[label] = trainDataSet_fsdd[label] + trainDataSet_wolfram[label]

## **Mô hình 1**
- 10 hiddent states
- Multivariate Diagonal Gauss cho emission probs

In [None]:
# Kiểm tra đảm bảo chuỗi có độ dài nhỏ nhất không nhỏ hơn số state
for label in trainDataSet.keys():
  print(min(map(len, trainDataSet[label])))

10
10
10
10
10
10
10
10
10
10


In [None]:
%%time
# train
failed_label = []
hmmModels = train_GMMHMM(trainDataSet, n_mfcc_features,10,n_mixtures, failed_label_return=True, report_fail=failed_label)
print("Finish training of the GMM_HMM models for digits 0-9")
print(failed_label)
assert len(failed_label) != 0

Training model detect 1
[1] Improvement: 1900732.594625065	Time (s): 0.6986
[2] Improvement: nan	Time (s): 0.7044
Total Training Improvement: nan
Total Training Time (s): 2.1204
[1] Improvement: 1596524.6029461203	Time (s): 0.6739
[2] Improvement: nan	Time (s): 0.6878
Total Training Improvement: nan
Total Training Time (s): 2.0569
[1] Improvement: 1823080.4574629788	Time (s): 0.6893
[2] Improvement: 18815.808928212617	Time (s): 0.6917
[3] Improvement: 4610.3697009531315	Time (s): 0.6907
[4] Improvement: 2092.3080024482915	Time (s): 0.6798
[5] Improvement: 1675.3370093686972	Time (s): 0.6665
[6] Improvement: 2525.810554763535	Time (s): 0.6823
[7] Improvement: 2752.6452335525537	Time (s): 0.6803
[8] Improvement: 1982.7531066265656	Time (s): 0.692
[9] Improvement: 3611.5396225923905	Time (s): 0.6708
[10] Improvement: 1883.2910967267817	Time (s): 0.6748
[11] Improvement: 764.1433068017941	Time (s): 0.6867
[12] Improvement: 361.1839180038078	Time (s): 0.6733
[13] Improvement: 136.5342256440

In [None]:
hmmModels = retrain_specific_class(hmmModels, '5', trainDataSet, n_mfcc_features,10,n_mixtures)

[1] Improvement: 1256661.1558708488	Time (s): 0.6745
[2] Improvement: nan	Time (s): 0.6872
Total Training Improvement: nan
Total Training Time (s): 2.0266
[1] Improvement: 1682581.9681572467	Time (s): 0.6723
[2] Improvement: 18636.74136057147	Time (s): 0.6625
[3] Improvement: 5496.081888972665	Time (s): 0.6512
[4] Improvement: 4011.4449186443817	Time (s): 0.6664
[5] Improvement: 2413.141591915628	Time (s): 0.6666
[6] Improvement: 1828.5377103947103	Time (s): 0.6671
[7] Improvement: 1364.4620807199972	Time (s): 0.6695
[8] Improvement: 1067.4356425398728	Time (s): 0.6937
[9] Improvement: 719.3100105014164	Time (s): 0.6963
[10] Improvement: 603.2448345400626	Time (s): 0.6731
[11] Improvement: 357.2575503643602	Time (s): 0.6681
[12] Improvement: 288.29570898308884	Time (s): 0.6498
[13] Improvement: 218.25026235345285	Time (s): 0.6425
[14] Improvement: 220.1224326157244	Time (s): 0.6488
[15] Improvement: 201.5907062414335	Time (s): 0.6444
[16] Improvement: 201.12827272212598	Time (s): 0.669

In [None]:
print("Evaluate on train set")
evaluateModel(trainDataSet, hmmModels)
print("Evaluate on rest of FSDD set")
evaluateModel(testDataSet_fsdd, hmmModels)
print("Evaluate on wolfram test set")
evaluateModel(testDataSet_wolfram, hmmModels)

Evaluate on train set
0: 642/795 (0.8075471698113208)
1: 624/837 (0.7455197132616488)
2: 618/745 (0.8295302013422818)
3: 474/793 (0.5977301387137453)
4: 676/803 (0.8418430884184309)
5: 678/812 (0.8349753694581281)
6: 591/846 (0.6985815602836879)
7: 592/789 (0.7503168567807351)
8: 556/795 (0.6993710691823899)
9: 618/784 (0.7882653061224489)
Final recognition rate is 75.87 %
Evaluate on rest of FSDD set
0: 169/211 (0.8009478672985783)
1: 118/208 (0.5673076923076923)
2: 202/214 (0.9439252336448598)
3: 103/218 (0.4724770642201835)
4: 202/214 (0.9439252336448598)
5: 198/203 (0.9753694581280788)
6: 87/199 (0.4371859296482412)
7: 185/234 (0.7905982905982906)
8: 131/197 (0.6649746192893401)
9: 139/202 (0.6881188118811881)
Final recognition rate is 73.05 %
Evaluate on wolfram test set
0: 1346/1670 (0.8059880239520958)
1: 1186/1625 (0.7298461538461538)
2: 1382/1714 (0.8063010501750292)
3: 933/1645 (0.5671732522796352)
4: 1321/1655 (0.7981873111782477)
5: 1346/1642 (0.8197320341047503)
6: 1217/16

In [None]:
# lưu lại model
for model_label in hmmModels.keys():
  file = drive.CreateFile({'title': f'hmm[{model_label}]_{n_mfcc_features}_10_{n_mixtures}[{experiment_id}].json', 'parents': [{'id': '1QPUr4vwYHu3n9iH3iQmnvDUt2Dgx4V3Y'}]})
  file.SetContentString(hmmModels[model_label].to_json())
  file.Upload()

## **Mô hình 2**
- 5 hiddent states
- Multivariate Diagonal Gauss cho emission probs

In [None]:
%%time
# train
failed_label = []
hmmModels = train_GMMHMM(trainDataSet, n_mfcc_features,5,n_mixtures, failed_label_return=True, report_fail=failed_label)
print("Finish training of the GMM_HMM models for digits 0-9")
print(failed_label)
assert len(failed_label) != 0

Training model detect 1
[1] Improvement: 1115168.0888382865	Time (s): 0.3857
[2] Improvement: 18176.900330435834	Time (s): 0.3768
[3] Improvement: 8911.455749416025	Time (s): 0.3749
[4] Improvement: 7278.5240153273335	Time (s): 0.3649
[5] Improvement: 5552.337391261128	Time (s): 0.383
[6] Improvement: 3712.5366947660223	Time (s): 0.3814
[7] Improvement: 3046.721109391772	Time (s): 0.3684
[8] Improvement: 1306.0356797856512	Time (s): 0.3898
[9] Improvement: 695.5998976215487	Time (s): 0.3708
[10] Improvement: 412.42012094706297	Time (s): 0.3676
[11] Improvement: 288.0230992799625	Time (s): 0.3794
[12] Improvement: 206.22501459508203	Time (s): 0.3733
[13] Improvement: 121.62166118156165	Time (s): 0.3823
[14] Improvement: 105.88605856720824	Time (s): 0.3751
[15] Improvement: 103.57819355838001	Time (s): 0.3773
[16] Improvement: 105.41273196320981	Time (s): 0.3824
[17] Improvement: 84.50999763479922	Time (s): 0.3703
[18] Improvement: 65.59726960863918	Time (s): 0.3661
[19] Improvement: 46.

In [None]:
print("Evaluate on train set")
evaluateModel(trainDataSet, hmmModels)
print("Evaluate on rest of FSDD set")
evaluateModel(testDataSet_fsdd, hmmModels)
print("Evaluate on wolfram test set")
evaluateModel(testDataSet_wolfram, hmmModels)

Evaluate on train set
0: 664/795 (0.8352201257861636)
1: 523/837 (0.6248506571087217)
2: 323/745 (0.43355704697986575)
3: 610/793 (0.7692307692307693)
4: 634/803 (0.7895392278953923)
5: 693/812 (0.853448275862069)
6: 611/846 (0.7222222222222222)
7: 581/789 (0.7363751584283904)
8: 408/795 (0.5132075471698113)
9: 529/784 (0.6747448979591837)
Final recognition rate is 69.71 %
Evaluate on rest of FSDD set
0: 188/211 (0.8909952606635071)
1: 94/208 (0.4519230769230769)
2: 66/214 (0.308411214953271)
3: 187/218 (0.8577981651376146)
4: 205/214 (0.9579439252336449)
5: 200/203 (0.9852216748768473)
6: 87/199 (0.4371859296482412)
7: 172/234 (0.7350427350427351)
8: 108/197 (0.5482233502538071)
9: 114/202 (0.5643564356435643)
Final recognition rate is 67.67 %
Evaluate on wolfram test set
0: 1373/1670 (0.8221556886227545)
1: 1017/1625 (0.6258461538461538)
2: 757/1714 (0.44165694282380397)
3: 1214/1645 (0.7379939209726444)
4: 1203/1655 (0.7268882175226586)
5: 1352/1642 (0.8233861144945189)
6: 1269/1624

In [None]:
# lưu lại model
for model_label in hmmModels.keys():
  file = drive.CreateFile({'title': f'hmm[{model_label}]_{n_mfcc_features}_5_{n_mixtures}[{experiment_id}].json', 'parents': [{'id': '1QPUr4vwYHu3n9iH3iQmnvDUt2Dgx4V3Y'}]})
  file.SetContentString(hmmModels[model_label].to_json())
  file.Upload()

## **Mô hình 3**
- 3 hiddent states
- Multivariate Diagonal Gauss cho emission probs

In [None]:
%%time
# train
failed_label = []
hmmModels = train_GMMHMM(trainDataSet, n_mfcc_features,3,n_mixtures, failed_label_return=True, report_fail=failed_label)
print("Finish training of the GMM_HMM models for digits 0-9")
print(failed_label)
assert len(failed_label) != 0

Training model detect 1
[1] Improvement: 1228972.7118902816	Time (s): 0.2551
[2] Improvement: 1529.955194242415	Time (s): 0.2388
[3] Improvement: 5211.57258149609	Time (s): 0.2457
[4] Improvement: 6175.628565207357	Time (s): 0.2318
[5] Improvement: 5611.7893794361735	Time (s): 0.2455
[6] Improvement: 5745.156590818544	Time (s): 0.2374
[7] Improvement: 5219.652287456789	Time (s): 0.2261
[8] Improvement: 4963.210654206807	Time (s): 0.2307
[9] Improvement: 3928.178926367662	Time (s): 0.2337
[10] Improvement: 1984.3651323717786	Time (s): 0.2373
[11] Improvement: 1235.759042506339	Time (s): 0.232
[12] Improvement: 829.6454454570776	Time (s): 0.2277
[13] Improvement: 568.4569778447039	Time (s): 0.2319
[14] Improvement: 346.41134371911176	Time (s): 0.2306
[15] Improvement: 189.3590628564125	Time (s): 0.2418
[16] Improvement: 96.03325157077052	Time (s): 0.2325
[17] Improvement: 54.77494796458632	Time (s): 0.2307
[18] Improvement: 34.44800916186068	Time (s): 0.2334
[19] Improvement: 24.55397475

In [None]:
print("Evaluate on train set")
evaluateModel(trainDataSet, hmmModels)
print("Evaluate on rest of FSDD set")
evaluateModel(testDataSet_fsdd, hmmModels)
print("Evaluate on wolfram test set")
evaluateModel(testDataSet_wolfram, hmmModels)

Evaluate on train set
0: 660/795 (0.8301886792452831)
1: 291/837 (0.34767025089605735)
2: 537/745 (0.7208053691275168)
3: 114/793 (0.1437578814627995)
4: 471/803 (0.5865504358655044)
5: 636/812 (0.7832512315270936)
6: 636/846 (0.75177304964539)
7: 399/789 (0.5057034220532319)
8: 105/795 (0.1320754716981132)
9: 668/784 (0.8520408163265306)
Final recognition rate is 56.47 %
Evaluate on rest of FSDD set
0: 198/211 (0.9383886255924171)
1: 94/208 (0.4519230769230769)
2: 169/214 (0.7897196261682243)
3: 33/218 (0.15137614678899083)
4: 154/214 (0.719626168224299)
5: 185/203 (0.9113300492610837)
6: 139/199 (0.6984924623115578)
7: 137/234 (0.5854700854700855)
8: 7/197 (0.03553299492385787)
9: 145/202 (0.7178217821782178)
Final recognition rate is 60.05 %
Evaluate on wolfram test set
0: 1373/1670 (0.8221556886227545)
1: 538/1625 (0.3310769230769231)
2: 1211/1714 (0.706534422403734)
3: 222/1645 (0.13495440729483282)
4: 912/1655 (0.5510574018126888)
5: 1246/1642 (0.758830694275274)
6: 1248/1624 (0.

In [None]:
# lưu lại model
for model_label in hmmModels.keys():
  file = drive.CreateFile({'title': f'hmm[{model_label}]_{n_mfcc_features}_3_{n_mixtures}[{experiment_id}].json', 'parents': [{'id': '1QPUr4vwYHu3n9iH3iQmnvDUt2Dgx4V3Y'}]})
  file.SetContentString(hmmModels[model_label].to_json())
  file.Upload()