In [None]:
from gmm import *

In [None]:
import os,glob
from tqdm import tqdm
import copy
import time

In [None]:
target = list("Z123456789O")

### 1. Prepare data

In [None]:
if not os.path.isdir("train_feats"):
    os.mkdir("train_feats")
    for t in target:
        files = glob.glob( os.path.join("train","*","*",t+"*.wav") )
        
        datas = []
        for fileName in tqdm(files):
            datas.append( compute_mfcc_feat(fileName) )
            
        datas = np.row_stack(datas)
        print(t,"Save to npy file",datas.shape)
        np.save( os.path.join("train_feats",f"{t}_mfcc.npy"), datas )   

if not os.path.isdir("test_feats"):
    os.mkdir("test_feats")

    files = glob.glob( os.path.join("test","*.wav") )

    for fileName in files:
        feat = compute_mfcc_feat(fileName)
        #print(datas[0])
        name = os.path.join( "test_feats",os.path.basename(fileName)[:-3] + "npy" )
        print("Save to npy file",feat.shape,name)
        np.save(name,feat)

### 2. Initialize GMMs

In [None]:
gmms = {}

if not os.path.isdir("saved_models"):  
    os.mkdir("saved_models")
    allData = []
    for t in target:
        allData.append( np.load( os.path.join("train_feats",t+"_mfcc.npy") ) )
    allData = np.row_stack( allData )
    gmm = GMM(k=3).initialize(allData)

    for t in target:
        gmms[t] = copy.deepcopy(gmm)
        gmms[t].save( os.path.join("saved_models",t+".gmm") )
    print("Initialize models done")   
    
else:
    for t in target:
        gmms[t] = load_GMM( os.path.join("saved_models",t+".gmm") )
    print("Load pretrained models done")  

### 3. Train GMMs

In [None]:
for t in target:
    feats = np.load( os.path.join("train_feats",t+"_mfcc.npy") )
    for i in range(5):
        st = time.time()
        log_llh = gmms[t].estimate_parallel(feats,threads=5)
        print(f"Target: {t} , Iter: {i}, Likelihood: {log_llh:.6f}, Time Cost: {int(time.time()-st)} s")
    gmms[t].save( os.path.join("saved_models",f"{t}.gmm") )
    print(f"Saved model {t}.gmm done!")

### 4.Test GMMs

In [None]:
correction_num = 0
error_num = 0
acc = 0.0

testFiiles = glob.glob(os.path.join("test_feats","*.npy"))
numUtts = len(testFiiles)

for i,fp in enumerate(testFiiles):
    feats = np.load(fp)
    label = fp[-12] 
    scores = []
    for t in target:
        scores.append(gmms[t].calc_log_likelihood(feats))
    predict_target = target[scores.index(max(scores))]

    if predict_target == label:
        correction_num += 1
        result = "right"
    else:
        error_num += 1
        result = "wrong"
    print(f"{i}/{numUtts} utt: {fp} label:{label} predict:{predict_target} {result}")

acc = correction_num * 1.0 / (correction_num + error_num)
print("Total accuracy:",acc)