In [1]:
import pyaudio
import time
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.ops import gen_audio_ops as audio_ops
from datetime import datetime

model = keras.models.load_model("fully_trained.model")

FORMAT = pyaudio.paFloat32
RATE = 16000
CHANNELS = 1
NOFFRAMES = 8000

audio = pyaudio.PyAudio()

info = audio.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
for i in range(0, numdevices):
    if (audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
        print("Input Device id ", i, " - ",
              audio.get_device_info_by_host_api_device_index(0, i).get('name'))


samples = np.zeros((8000))


def callback(input_data, frame_count, time_info, flags):
    global samples
    # print("Got audio " + str(frame_count))
    new_samples = np.frombuffer(input_data, np.float32)
    samples = np.concatenate((samples, new_samples))
    samples = samples[-16000:]

    if len(samples) == 16000:
        start = time.perf_counter()
        # normalise the samples
        normalised = samples - np.mean(samples)
        max = np.max(normalised)
        if max > 0:
            normalised = normalised / max

        # create the spectrogram
        spectrogram = audio_ops.audio_spectrogram(
            np.reshape(normalised, (16000, 1)),
            window_size=320,
            stride=160, 
            magnitude_squared=True)
        # reduce the number of frequency bins in our spectrogram to a more sensible level
        spectrogram = tf.nn.pool(
            input=tf.expand_dims(spectrogram, -1),
            window_shape=[1, 6],
            strides=[1, 6],
            pooling_type='AVG',
            padding='SAME')
        # remove the first 1 index
        spectrogram = tf.squeeze(spectrogram, axis=0)
        spectrogram = np.log10(spectrogram + 1e-6)
        prediction = model  .predict(np.reshape(spectrogram, (1, 99, 43, 1)))
        if prediction[0][0] > 0.95:
            print(
                f"{datetime.now().time()} - Go !  {prediction[0][0]}")
        end = time.perf_counter()
        # print((end-start)*1000)

    return input_data, pyaudio.paContinue

Input Device id  0  -  Microsoft Sound Mapper - Input
Input Device id  1  -  Microphone Array (טכנולוגיית In


In [2]:
stream = audio.open(
    input_device_index=0,
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    stream_callback=callback,
    frames_per_buffer=NOFFRAMES)

stream.start_stream()
print("speak")

# wait for stream to finish (5)
while stream.is_active() and time.time:
    time.sleep(0.1)

speak
21:34:09.930474 - Go !  0.9634836912155151
21:34:24.958687 - Go !  0.9640662670135498
21:34:29.919161 - Go !  0.9523192048072815
21:34:34.427145 - Go !  0.9595927000045776
21:34:51.434890 - Go !  0.9987392425537109
21:34:51.959317 - Go !  0.9990761280059814
21:35:11.438317 - Go !  0.9732465744018555
21:35:15.924390 - Go !  0.9885846972465515
21:35:16.436315 - Go !  0.9987018704414368
21:35:22.473810 - Go !  0.9850963354110718
21:35:29.432153 - Go !  0.9798650741577148
21:35:35.931709 - Go !  0.9900891184806824
21:36:00.430118 - Go !  0.9795277118682861
21:36:01.930002 - Go !  0.9862238168716431
21:36:19.453315 - Go !  0.991342306137085


KeyboardInterrupt: 

10:54:30.769536 - Go !  0.9715851545333862
10:55:06.277274 - Go !  0.9987924695014954
10:55:06.773880 - Go !  0.9941864609718323
10:55:09.772790 - Go !  0.966841459274292
10:56:43.978472 - Go !  0.9867025017738342
10:56:56.939101 - Go !  0.9996706247329712
10:56:57.459991 - Go !  0.9997084140777588
10:57:00.956611 - Go !  0.9842497110366821
10:57:01.462622 - Go !  0.9660252332687378
10:57:39.116482 - Go !  0.9896805286407471
10:58:21.613332 - Go !  0.9522767066955566
10:59:22.564377 - Go !  0.992982029914856
10:59:23.043281 - Go !  0.9539626836776733
11:00:05.536758 - Go !  0.9641784429550171
11:00:17.527707 - Go !  0.9556497931480408
11:02:53.537961 - Go !  0.9652309417724609
11:02:54.036464 - Go !  0.9840492010116577
11:03:49.541174 - Go !  0.9723050594329834
11:03:50.036259 - Go !  0.9974435567855835
11:03:51.033951 - Go !  0.9833171367645264
11:04:27.536846 - Go !  0.9908380508422852
11:04:56.540494 - Go !  0.9577382802963257
11:06:30.538795 - Go !  0.9582986831665039
11:06:31.0347

In [None]:
# stream.stop_stream()
# stream.close()
# p.terminate()
# print('done')
# plt.plot(decoded)
# plt.show()