In [1]:
import os
import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

Using TensorFlow backend.


In [2]:
def create_model(width=20, height=80):
    batch_size = 64
    classes = 10

    print('建立模型...')
    model = Sequential()
    model.add(LSTM(128 * 4, input_shape=(width, height)))
    model.add(Dropout(0.5))
    model.add(Dense(classes, activation='softmax'))
    model.summary()
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    print('建立模型成功...')

    return model

In [3]:
model = create_model()

try:
    model.load_weights("../savemodel/speechModel.h5")
    print("加载模型成功")
except:
    print("加载模型失败")

wav_files = [
    '../../data/numbers_speech/0_Agnes_120.wav',
    '../../data/numbers_speech/8_Agnes_220.wav',
    '../../data/numbers_speech/4_Bruce_360.wav',
    '../../data/numbers_speech/2_Kathy_380.wav'
]

mfcc_batch_features = []
# 为输入文件分类
for input_file in wav_files:
    wave, sr = librosa.load(input_file, mono=True)
    mfcc = librosa.feature.mfcc(wave, sr)
    # print(np.array(mfcc).shape)
    # 补齐  (0, 0) 表示水平方向上面下面都加0行
    #       (0, 80 - len(mfcc[0])) 表示左面加0列，右面加(80 - len(mfcc[0]))列，保证一共80列
    mfcc = np.pad(mfcc, ((0, 0), (0, 80 - len(mfcc[0]))), mode='constant', constant_values=0)
    mfcc_batch_features.append(mfcc)

predict = model.predict_classes(np.array(mfcc_batch_features))

for i, j in enumerate(wav_files):
    print('声音文件路径：', j)
    print('预测值声音值是：', predict[i])

建立模型...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 512)               1214464   
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5130      
Total params: 1,219,594
Trainable params: 1,219,594
Non-trainable params: 0
_________________________________________________________________
建立模型成功...
加载模型成功
声音文件路径： ../../data/numbers_speech/0_Agnes_120.wav
预测值声音值是： 0
声音文件路径： ../../data/numbers_speech/8_Agnes_220.wav
预测值声音值是： 8
声音文件路径： ../../data/numbers_speech/4_Bruce_360.wav
预测值声音值是： 6
声音文件路径： ../../data/numbers_speech/2_Kathy_380.wav
预测值声音值是： 2
