In [1]:
import os
import numpy as np
import scipy.io.wavfile as wav
import python_speech_features as sf
from hmmlearn import hmm
import csv as csv

In [2]:
# mfcc
def get_mfcc(file_path):
    '''
    :param file_path: 音频文件路径
    :return: 一个mfcc特征矩阵
    '''
    rate, data = wav.read(file_path)
    wav_features = sf.mfcc(signal=data, samplerate=rate, appendEnergy=True, nfft=2048)
    d_mfcc_feat1 = sf.delta(wav_features, 1) # 一阶差分
    d_mfcc_feat2 = sf.delta(wav_features, 2) # 二阶差分
    feature = np.hstack((wav_features, d_mfcc_feat1, d_mfcc_feat2))
    return feature

def get_features(csv_file_path):
    file =  open(csv_file_path)
    folder = os.path.split(csv_file_path)[0]
    file_data = csv.DictReader(file)
    result = {}
    for item in file_data:
        mfcc = get_mfcc(os.path.join(folder,item['path']))
        if item['species'] not in result:
            result[item['species']] = []
        result[item['species']].append(mfcc)
    return result

In [4]:
train = get_features('wakins/test.csv')

models = {}
for label, mfccs in train.items():
    sequence = []
    train_mat = np.array([])
    for mfcc in mfccs:
        sequence.append(mfcc.shape[0])
        if(train_mat.ndim == 1):
            train_mat = mfcc
        else:
            train_mat = np.append(train_mat, mfcc, axis=0)
    model = hmm.GaussianHMM(n_components=12, n_iter=3)
    model = model.fit(train_mat, sequence)
    models[label] = model

  rate, data = wav.read(file_path)


In [6]:
test = get_features('wakins/test.csv')

preds_num = 0
right_preds_num = 0

for true_label, mfccs in test.items():
    for mfcc in mfccs:
        best_score = float('-inf')
        preds_num += 1
        for label, model in models.items():
            score = model.score(mfcc)
            if score > best_score:
                best_score = score
                pred_label = label
        if pred_label == true_label:
            right_preds_num += 1

  rate, data = wav.read(file_path)


In [7]:
preds_num

474

In [8]:
right_preds_num

464