In [1]:
import pyaudio
import time
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.ops import gen_audio_ops as audio_ops
from datetime import datetime

model = keras.models.load_model("fully_trained.model")

FORMAT = pyaudio.paFloat32
RATE = 16000
CHANNELS = 1
NOFFRAMES = 8000

audio = pyaudio.PyAudio()

info = audio.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
for i in range(0, numdevices):
    if (audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
        print("Input Device id ", i, " - ",
              audio.get_device_info_by_host_api_device_index(0, i).get('name'))


samples = np.zeros((8000))


def callback(input_data, frame_count, time_info, flags):
    global samples
    # print("Got audio " + str(frame_count))
    new_samples = np.frombuffer(input_data, np.float32)
    samples = np.concatenate((samples, new_samples))
    samples = samples[-16000:]

    if len(samples) == 16000:
        start = time.perf_counter()
        # normalise the samples
        normalised = samples - np.mean(samples)
        max = np.max(normalised)
        if max > 0:
            normalised = normalised / max

        # create the spectrogram
        spectrogram = audio_ops.audio_spectrogram(
            np.reshape(normalised, (16000, 1)),
            window_size=320,
            stride=160, 
            magnitude_squared=True)
        # reduce the number of frequency bins in our spectrogram to a more sensible level
        spectrogram = tf.nn.pool(
            input=tf.expand_dims(spectrogram, -1),
            window_shape=[1, 6],
            strides=[1, 6],
            pooling_type='AVG',
            padding='SAME')
        # remove the first 1 index
        spectrogram = tf.squeeze(spectrogram, axis=0)
        spectrogram = np.log10(spectrogram + 1e-6)
        prediction = model.predict(np.reshape(spectrogram, (1, 99, 43, 1)))        
        
        index = 0
        max_value = 0
        for i in range(7):
            if prediction[0][i] > max_value:
                max_value = prediction[0][i]
                index = i
        
        dict_predict = {0 : 'forward', 1: 'backward', 2:'left', 3:'right', 4:'one', 5:'zero', 6:'invalid'}        
        if max_value > 0.95 and index != 6:
            print(
                f"{datetime.now().time()} - {dict_predict[index]}, and score: {max_value}")
        
        end = time.perf_counter()
        # print((end-start)*1000)

    return input_data, pyaudio.paContinue

Input Device id  0  -  Microsoft Sound Mapper - Input
Input Device id  1  -  Microphone Array (טכנולוגיית In


In [2]:
stream = audio.open(
    input_device_index=0,
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    stream_callback=callback,
    frames_per_buffer=NOFFRAMES)

stream.start_stream()
print("speak")

# wait for stream to finish (5)
while stream.is_active() and time.time:
    time.sleep(0.1)

speak
23:36:59.958387 - right, and score: 0.9834263324737549
23:37:05.969395 - backward, and score: 0.9812319874763489
23:37:30.462275 - zero, and score: 0.9911172389984131
23:37:31.957123 - one, and score: 0.9996234178543091
23:37:32.473829 - one, and score: 0.9999568462371826
23:37:34.953992 - zero, and score: 0.9978335499763489
23:37:39.972665 - zero, and score: 0.9800373315811157
23:37:50.476123 - right, and score: 0.9994290471076965
23:37:50.965646 - right, and score: 0.9977118968963623
23:37:53.961799 - right, and score: 0.9970663189888
23:37:54.457929 - right, and score: 0.996692419052124
23:38:01.958795 - left, and score: 0.9586244225502014
23:38:09.463391 - right, and score: 0.9899896383285522
23:38:11.464216 - one, and score: 0.9916074275970459
23:38:12.955885 - zero, and score: 0.9843147397041321
23:38:17.454766 - backward, and score: 0.9979459643363953
23:38:27.953012 - forward, and score: 0.9950396418571472
23:38:33.469741 - right, and score: 0.9998117089271545
23:38:34.45

KeyboardInterrupt: 

In [None]:
# stream.stop_stream()
# stream.close()
# p.terminate()
# print('done')
# plt.plot(decoded)
# plt.show()