In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    print('data_dir: ', data_dir)
    print(f'mfcc.shape: {np.array(mfcc).shape}')
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  


In [2]:
class_names = ["cothe", "khong", "nguoi", "toi", "va" , "test_cothe" , "test_khong" , "test_nguoi", "test_toi", "test_va" ]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))
 
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)    

models = {}

Load cothe dataset
data_dir:  data\cothe
mfcc.shape: (70,)
Load khong dataset
data_dir:  data\khong
mfcc.shape: (69,)
Load nguoi dataset
data_dir:  data\nguoi
mfcc.shape: (70,)
Load toi dataset
data_dir:  data\toi
mfcc.shape: (70,)
Load va dataset
data_dir:  data\va
mfcc.shape: (70,)
Load test_cothe dataset
data_dir:  data\test_cothe
mfcc.shape: (30,)
Load test_khong dataset
data_dir:  data\test_khong
mfcc.shape: (31,)
Load test_nguoi dataset
data_dir:  data\test_nguoi
mfcc.shape: (30,)
Load test_toi dataset
data_dir:  data\test_toi
mfcc.shape: (30,)
Load test_va dataset
data_dir:  data\test_va
mfcc.shape: (30,)
vectors (16358, 36)
centers (10, 36)
centers (10, 36)


In [3]:
models = {}
original_dataset = {}

In [4]:
#từ
original_dataset['cothe'] = dataset['cothe'].copy()
cname = 'cothe'
class_vectors = dataset[cname]
# convert all vectors to the cluster index
# dataset['one'] = [O^1, ... O^R] , O^r: the r-th recorded wav file 
# O^r = (c1, c2, ... ct, ... cT) , c_i: the i-th frame in the r-th observation ( or the r-th wav file )
# O^r size T x 1
dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])


hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=13, random_state=0, n_iter=1000, verbose=True,
    params='te',
    init_params='e'
)
hmm.startprob_=np.array([0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
hmm.transmat_=np.array([
    [0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
])
if cname[:4] != 'test':
    X = np.concatenate(dataset[cname])
    lengths = list([len(x) for x in dataset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
dataset['test_cothe'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_cothe']])

training class cothe
(2482, 1) [52, 35, 47, 33, 70, 72, 48, 60, 56, 85, 27, 50, 19, 22, 24, 19, 24, 41, 17, 28, 27, 19, 45, 23, 20, 24, 23, 21, 21, 34, 19, 21, 24, 57, 16, 36, 32, 22, 26, 36, 33, 34, 30, 38, 66, 35, 27, 34, 37, 30, 29, 29, 32, 32, 25, 55, 32, 25, 41, 30, 29, 27, 30, 32, 32, 32, 58, 37, 66, 70] 70


         1       -5615.9237             +nan
         2       -3665.4153       +1950.5084
         3       -3065.5611        +599.8542
         4       -2862.5033        +203.0579
         5       -2786.3160         +76.1873
         6       -2752.9204         +33.3956
         7       -2735.8346         +17.0858
         8       -2725.7506         +10.0840
         9       -2719.1210          +6.6296
        10       -2714.2136          +4.9074
        11       -2710.7717          +3.4418
        12       -2708.5016          +2.2702
        13       -2707.0841          +1.4175
        14       -2706.2425          +0.8416
        15       -2705.7511          +0.4914
        16       -2705.4603          +0.2909
        17       -2705.2854          +0.1749
        18       -2705.1768          +0.1086
        19       -2705.1047          +0.0721
        20       -2705.0531          +0.0515
        21       -2705.0143          +0.0388
        22       -2704.9841          +0.0302
        23

In [5]:
#từ
original_dataset['toi'] = dataset['toi'].copy()
cname = 'toi'
class_vectors = dataset[cname]
# convert all vectors to the cluster index
# dataset['one'] = [O^1, ... O^R] , O^r: the r-th recorded wav file 
# O^r = (c1, c2, ... ct, ... cT) , c_i: the i-th frame in the r-th observation ( or the r-th wav file )
# O^r size T x 1

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])


hmm = hmmlearn.hmm.MultinomialHMM(
    n_components= 8, random_state=0, n_iter=1100, verbose=True,
    params='te',
    init_params='e'
)
hmm.startprob_ = np.array([0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,])
hmm.transmat_ = np.array([
        [0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.4],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],       
    ])

if cname[:4] != 'test':
    X = np.concatenate(dataset[cname])
    lengths = list([len(x) for x in dataset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
dataset['test_toi'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_toi']])

         1       -3233.3739             +nan
         2       -2029.9727       +1203.4012


training class toi
(1375, 1) [12, 16, 14, 14, 15, 19, 13, 16, 14, 17, 13, 15, 14, 9, 14, 14, 13, 15, 16, 15, 13, 15, 15, 14, 13, 38, 46, 10, 17, 23, 22, 9, 11, 18, 18, 35, 14, 15, 31, 15, 20, 12, 14, 12, 14, 18, 37, 18, 15, 16, 21, 14, 19, 25, 15, 15, 25, 24, 30, 24, 22, 17, 49, 28, 79, 26, 12, 40, 16, 18] 70


         3       -1765.5397        +264.4330
         4       -1656.4370        +109.1027
         5       -1612.2693         +44.1677
         6       -1591.9718         +20.2975
         7       -1581.2495         +10.7222
         8       -1573.6622          +7.5874
         9       -1566.5296          +7.1326
        10       -1558.0596          +8.4700
        11       -1548.2396          +9.8199
        12       -1538.1697         +10.0699
        13       -1529.3032          +8.8666
        14       -1522.6412          +6.6620
        15       -1517.0887          +5.5525
        16       -1513.3634          +3.7253
        17       -1511.1371          +2.2263
        18       -1510.1756          +0.9615
        19       -1509.7836          +0.3920
        20       -1509.5925          +0.1911
        21       -1509.4831          +0.1094
        22       -1509.4143          +0.0687
        23       -1509.3683          +0.0460
        24       -1509.3357          +0.0327
        25

In [6]:
#từ
original_dataset['khong'] = dataset['khong'].copy()
cname = 'khong'
class_vectors = dataset[cname]
# convert all vectors to the cluster index
# dataset['one'] = [O^1, ... O^R] , O^r: the r-th recorded wav file 
# O^r = (c1, c2, ... ct, ... cT) , c_i: the i-th frame in the r-th observation ( or the r-th wav file )
# O^r size T x 1
dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])


hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=13, random_state=0, n_iter=1000, verbose=True,
    params='te',
    init_params='e'
)
hmm.startprob_=np.array([0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
hmm.transmat_=np.array([
    [0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
])
if cname[:4] != 'test':
    X = np.concatenate(dataset[cname])
    lengths = list([len(x) for x in dataset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
dataset['test_khong'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_khong']])

         1       -7855.3468             +nan
         2       -3904.0268       +3951.3200
         3       -3437.8730        +466.1538


training class khong
(4017, 1) [26, 101, 29, 31, 26, 34, 25, 15, 22, 46, 30, 33, 101, 25, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 38, 19, 27, 22, 31, 25, 31, 26, 27, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 23, 25, 23, 45, 26, 19, 23, 30, 24, 101, 28, 28, 27, 25, 31, 32, 23, 22, 18, 28, 101, 101, 101] 69


         4       -2881.1423        +556.7306
         5       -2490.4969        +390.6454
         6       -2348.9101        +141.5869
         7       -2195.5921        +153.3179
         8       -2166.9032         +28.6890
         9       -2160.7185          +6.1847
        10       -2157.5095          +3.2090
        11       -2155.3579          +2.1516
        12       -2153.8910          +1.4669
        13       -2152.9966          +0.8944
        14       -2152.5337          +0.4628
        15       -2152.3107          +0.2230
        16       -2152.1930          +0.1177
        17       -2152.1183          +0.0747
        18       -2152.0613          +0.0571
        19       -2152.0113          +0.0500
        20       -2151.9634          +0.0480
        21       -2151.9129          +0.0504
        22       -2151.8513          +0.0616
        23       -2151.7561          +0.0952
        24       -2151.5772          +0.1789
        25       -2151.2545          +0.3227
        26

In [7]:
#từ
original_dataset['nguoi'] = dataset['nguoi'].copy()
cname = 'nguoi'
class_vectors = dataset[cname]
# convert all vectors to the cluster index
# dataset['one'] = [O^1, ... O^R] , O^r: the r-th recorded wav file 
# O^r = (c1, c2, ... ct, ... cT) , c_i: the i-th frame in the r-th observation ( or the r-th wav file )
# O^r size T x 1
dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])


hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=13, random_state=0, n_iter=1000, verbose=True,
    params='te',
    init_params='e'
)
hmm.startprob_=np.array([0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
hmm.transmat_=np.array([
    [0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1,0.0],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.3,0.1],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
    [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
])
if cname[:4] != 'test':
    X = np.concatenate(dataset[cname])
    lengths = list([len(x) for x in dataset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
dataset['test_nguoi'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_nguoi']])

         1       -4248.9782             +nan
         2       -2418.7435       +1830.2347
         3       -1968.8261        +449.9174


training class nguoi
(1887, 1) [53, 36, 38, 47, 20, 44, 22, 48, 27, 37, 34, 33, 36, 25, 33, 18, 18, 32, 24, 25, 22, 31, 55, 26, 39, 18, 25, 22, 22, 15, 16, 24, 24, 51, 35, 24, 22, 29, 15, 14, 21, 35, 18, 20, 30, 16, 21, 34, 14, 32, 22, 21, 23, 26, 17, 22, 27, 17, 23, 24, 18, 10, 13, 27, 15, 19, 44, 29, 35, 35] 70


         4       -1808.2991        +160.5269
         5       -1746.7590         +61.5402
         6       -1712.2525         +34.5065
         7       -1689.9159         +22.3366
         8       -1677.7388         +12.1771
         9       -1670.0494          +7.6894
        10       -1663.1049          +6.9444
        11       -1657.1936          +5.9113
        12       -1652.3543          +4.8393
        13       -1648.2475          +4.1069
        14       -1645.0390          +3.2085
        15       -1642.8696          +2.1693
        16       -1641.4493          +1.4204
        17       -1640.4382          +1.0111
        18       -1639.6565          +0.7817
        19       -1639.0239          +0.6326
        20       -1638.4972          +0.5268
        21       -1638.0457          +0.4515
        22       -1637.6452          +0.4005
        23       -1637.2760          +0.3691
        24       -1636.9229          +0.3532
        25       -1636.5733          +0.3495
        26

In [8]:
#từ
original_dataset['va'] = dataset['va'].copy()
cname = 'va'
class_vectors = dataset[cname]
# convert all vectors to the cluster index
# dataset['one'] = [O^1, ... O^R] , O^r: the r-th recorded wav file 
# O^r = (c1, c2, ... ct, ... cT) , c_i: the i-th frame in the r-th observation ( or the r-th wav file )
# O^r size T x 1
dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])


hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=5, random_state=0, n_iter=1000, verbose=True,
    params='te',
    init_params='e'
)
hmm.startprob_=np.array([0.7,0.2,0.1,0.0,0.0])
hmm.transmat_=np.array([
    [0.7,0.2,0.1,0.0,0.0,],
    [0.0,0.7,0.2,0.1,0.0,],
    [0.0,0.0,0.7,0.2,0.1,],
    [0.0,0.0,0.0,0.6,0.4,],
    [0.0,0.0,0.0,0.0,1.0,],
])
if cname[:4] != 'test':
    X = np.concatenate(dataset[cname])
    lengths = list([len(x) for x in dataset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
dataset['test_va'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_va']])

         1       -2950.2600             +nan
         2       -2432.1076        +518.1524
         3       -2215.0956        +217.0120
         4       -2058.1837        +156.9119
         5       -2005.9496         +52.2341
         6       -1977.4169         +28.5328
         7       -1961.6650         +15.7519
         8       -1953.9192          +7.7458


training class va
(1385, 1) [19, 31, 21, 24, 31, 19, 17, 27, 22, 21, 16, 31, 23, 13, 23, 16, 17, 20, 16, 16, 26, 19, 36, 15, 18, 20, 13, 11, 14, 30, 17, 23, 16, 17, 21, 16, 14, 23, 15, 13, 16, 15, 17, 26, 23, 24, 12, 20, 21, 30, 23, 12, 20, 21, 17, 21, 38, 13, 15, 12, 12, 12, 14, 13, 25, 25, 21, 29, 18, 20] 70


         9       -1950.9077          +3.0116
        10       -1949.7669          +1.1407
        11       -1949.1924          +0.5745
        12       -1948.8354          +0.3569
        13       -1948.5853          +0.2501
        14       -1948.3941          +0.1912
        15       -1948.2383          +0.1558
        16       -1948.1061          +0.1322
        17       -1947.9912          +0.1149
        18       -1947.8899          +0.1013
        19       -1947.7997          +0.0901
        20       -1947.7188          +0.0809
        21       -1947.6454          +0.0734
        22       -1947.5779          +0.0675
        23       -1947.5150          +0.0629
        24       -1947.4554          +0.0596
        25       -1947.3978          +0.0576
        26       -1947.3411          +0.0567
        27       -1947.2842          +0.0569
        28       -1947.2260          +0.0582
        29       -1947.1652          +0.0607
        30       -1947.1007          +0.0645
        31

In [12]:
dataset['test_cothe'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_cothe']])
dataset['test_toi'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_toi']])
dataset['test_khong'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_khong']])
dataset['test_nguoi'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_nguoi']])
dataset['test_va'] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_va']])

ValueError: Incorrect number of features. Got 1 features, expected 36

In [9]:

print("Testing")
for true_cname in class_names:
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        print(true_cname, score)

Testing
cothe {'cothe': -73.58256792434165, 'toi': -341.7469982106616, 'khong': -634.9563595697565, 'nguoi': -inf, 'va': -131.1754626096717}
cothe {'cothe': -41.38104419986525, 'toi': -304.18249879214056, 'khong': -73.33619162043618, 'nguoi': -474.2436146488009, 'va': -83.69506287236474}
cothe {'cothe': -47.51649528793402, 'toi': -341.0796865504652, 'khong': -112.34656110297651, 'nguoi': -inf, 'va': -71.70271736459557}
cothe {'cothe': -37.50620196125571, 'toi': -306.0141908022296, 'khong': -77.97145671050824, 'nguoi': -766.0772876820344, 'va': -62.070437791645276}
cothe {'cothe': -90.83221433633506, 'toi': -246.58264584205844, 'khong': -314.2468074680819, 'nguoi': -inf, 'va': -132.55632507809761}
cothe {'cothe': -60.6563976936075, 'toi': -inf, 'khong': -255.25984642713948, 'nguoi': -inf, 'va': -261.428794054414}
cothe {'cothe': -44.25527984987748, 'toi': -245.4253844185153, 'khong': -105.50593403490737, 'nguoi': -521.4113982227358, 'va': -95.9544660879655}
cothe {'cothe': -51.954359374

In [10]:
import operator

print("Testing")

class_names = ["test_cothe","test_toi", "test_khong", "test_nguoi", "test_va"]
for true_cname in class_names:
    index = 0
    count = 0;
    total = 0;
    print("-----------------------")
    for O in dataset[true_cname]:
        index+=1
        total+=1
        pred = max(score.items(), key=operator.itemgetter(1))[0]
        if(true_cname[5:] == pred): count+=1
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        print(true_cname, pred, index)
    print(count / total * 100)

Testing
-----------------------
test_cothe toi 1
test_cothe khong 2
test_cothe cothe 3
test_cothe cothe 4
test_cothe cothe 5
test_cothe cothe 6
test_cothe cothe 7
test_cothe cothe 8
test_cothe va 9
test_cothe va 10
test_cothe nguoi 11
test_cothe cothe 12
test_cothe nguoi 13
test_cothe cothe 14
test_cothe cothe 15
test_cothe cothe 16
test_cothe cothe 17
test_cothe cothe 18
test_cothe cothe 19
test_cothe nguoi 20
test_cothe cothe 21
test_cothe cothe 22
test_cothe cothe 23
test_cothe cothe 24
test_cothe cothe 25
test_cothe cothe 26
test_cothe cothe 27
test_cothe khong 28
test_cothe toi 29
test_cothe nguoi 30
66.66666666666666
-----------------------
test_toi cothe 1
test_toi toi 2
test_toi va 3
test_toi va 4
test_toi toi 5
test_toi toi 6
test_toi va 7
test_toi toi 8
test_toi toi 9
test_toi toi 10
test_toi va 11
test_toi toi 12
test_toi toi 13
test_toi va 14
test_toi toi 15
test_toi toi 16
test_toi va 17
test_toi nguoi 18
test_toi nguoi 19
test_toi toi 20
test_toi cothe 21
test_toi toi 22


In [12]:
print("Testing")
for true_cname in class_names:
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        print(true_cname, score)

Testing
test_cothe {'cothe': -40.957090626264446, 'toi': -80.49862474428497, 'khong': -26.776424334840396, 'nguoi': -1037.77446214833, 'va': -34.552256571739}
test_cothe {'cothe': -43.66311519277686, 'toi': -181.61553783355495, 'khong': -78.73454828159339, 'nguoi': -174.64026715556204, 'va': -65.64870880975583}
test_cothe {'cothe': -65.70250188091644, 'toi': -257.1453924928715, 'khong': -173.8373540115144, 'nguoi': -400.7056397194441, 'va': -98.38355391412611}
test_cothe {'cothe': -42.82767780129281, 'toi': -258.812400340808, 'khong': -105.29640208082192, 'nguoi': -inf, 'va': -104.40575873431312}
test_cothe {'cothe': -30.526846167504132, 'toi': -160.6042802532979, 'khong': -64.56717005095868, 'nguoi': -277.5411723847018, 'va': -55.72042576101428}
test_cothe {'cothe': -23.384703429128226, 'toi': -44.129743051119654, 'khong': -50.52557133487596, 'nguoi': -40.337394911884765, 'va': -31.09944551057929}
test_cothe {'cothe': -21.453875893428506, 'toi': -53.26680583239012, 'khong': -54.203653