# Test Audio Sender

This file is used for sending audio data to the device. It measures the accuracy based on individual audio files.

**Note that the software on the MCU must match this!**

In [35]:
AUDIO_FOLDER_PATH = "./audio"
POSITIVE_PATH = f"{AUDIO_FOLDER_PATH}/positive"
NEGATIVE_PATH = f"{AUDIO_FOLDER_PATH}/negative"

In [36]:
import numpy as np
import pandas as pd
import gc
import os
import librosa
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import serial
import struct

In [37]:
# Collecte positive audio files and their classes
positive_audio_files = []
positive_audio_file_classes = []
# Look each file in the audio folder
audio_class_folders = os.listdir(POSITIVE_PATH)
# Loop each class folder
for audio_class_folder in audio_class_folders:
    # Assemble full audio class folder path.
    audio_class_folder_path = os.path.join(POSITIVE_PATH, audio_class_folder)
    print("Processing class folder: ", audio_class_folder_path)
    # Get all files in the audio class folder
    audio_class_files = os.listdir(audio_class_folder_path)
    # Loop each audio file in the audio class folder
    for audio_class_file in audio_class_files:
        # Assemble full audio file path.
        audio_file_path = os.path.join(audio_class_folder_path, audio_class_file)
        # Append the audio file path to the positive_audio_files list
        positive_audio_files.append(audio_file_path)
        # Append the audio class to the audio_file_classes list
        positive_audio_file_classes.append(audio_class_folder)
print("positive_audio_files length: ", len(positive_audio_files[0:10]))
print("positive_audio_file_classes length: ", len(positive_audio_file_classes[0:10]))

Processing class folder:  ./audio/positive/commercial
Processing class folder:  ./audio/positive/car
positive_audio_files length:  4
positive_audio_file_classes length:  4


In [38]:
# Collecte negative audio files and their classes
negative_audio_files = []
negative_audio_file_classes = []
# Look each file in the audio folder
audio_class_folders = os.listdir(NEGATIVE_PATH)
# Loop each class folder
for audio_class_folder in audio_class_folders:
    # Assemble full audio class folder path.
    audio_class_folder_path = os.path.join(NEGATIVE_PATH, audio_class_folder)
    print("Processing class folder: ", audio_class_folder_path)
    # Get all files in the audio class folder
    audio_class_files = os.listdir(audio_class_folder_path)
    # Loop each audio file in the audio class folder
    for audio_class_file in audio_class_files:
        # Assemble full audio file path.
        audio_file_path = os.path.join(audio_class_folder_path, audio_class_file)
        # Append the audio file path to the negative_audio_files list
        negative_audio_files.append(audio_file_path)
        # Append the audio class to the audio_file_classes list
        negative_audio_file_classes.append(audio_class_folder)
print("negative_audio_files length: ", len(negative_audio_files[0:10]))
print("negative_audio_file_classes length: ", len(negative_audio_file_classes[0:10]))

Processing class folder:  ./audio/negative/background
negative_audio_files length:  1
negative_audio_file_classes length:  1


In [39]:
# Shuffle the audio files and classes with the same seed.
seed = 42
np.random.seed(seed)
np.random.shuffle(positive_audio_files)

np.random.seed(seed)
np.random.shuffle(positive_audio_file_classes)

np.random.seed(seed)
np.random.shuffle(negative_audio_files)

np.random.seed(seed)
np.random.shuffle(negative_audio_file_classes)

In [40]:
# Hot end code the labels.
label_encoder = LabelEncoder()
positive_audio_file_classes_categorical = to_categorical(label_encoder.fit_transform(positive_audio_file_classes))
print("Example of audio_file_classes_categorial: ", positive_audio_file_classes_categorical[0:10])

Example of audio_file_classes_categorial:  [[0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]]


In [41]:
# Connect to MCU via serial.
print("Configuring serial port...")
ser = serial.Serial(
    port='/dev/ttyACM0',  # Change this to your actual port, e.g., 'COM3' on Windows, '/dev/ttyS0' on Linux
    baudrate=115200,       # Set baud rate to 115200
    bytesize=serial.EIGHTBITS,
    parity=serial.PARITY_NONE,
    stopbits=serial.STOPBITS_ONE,
    timeout=1              # Set timeout for reading
)

Configuring serial port...


In [42]:
if ser.is_open:
    print(f"Serial port {ser.port} opened at {ser.baudrate} baud.")

Serial port /dev/ttyACM0 opened at 115200 baud.


In [43]:
def calc_accuracy(y_true, y_pred):
    success = 0
    failure = 0
    for i in range(len(y_pred)):
        if int(y_pred[i]) == int(np.argmax(y_true[i])):
            success += 1
        else:
            failure += 1
    res = round(success / (success + failure), 4)
    return res

In [44]:
def streamAudioFile(file_path):
    # Check file format.
    if not file_path.endswith('.wav'):
        print("Error: Only .wav files are supported.")
        return
    # Load the audio file.
    audio_data, sr = librosa.load(file_path, sr=None)
    # Convert sample rate to 16kHz.
    if sr != 16000:
        audio_data = librosa.resample(y=audio_data, orig_sr=sr, target_sr=16000)
        sr = 16000
    # Convert the audio data to a numpy array.
    audio_data = np.array(audio_data, dtype=np.float32)
    # Send the audio data to the MCU.
    for j in range(len(audio_data)):
        #data_to_send = struct.pack('f', audio_data[j])
        #ser.write(data_to_send)
        print("Sending data to MCU: ", audio_data[j])

In [46]:

results = []
total_positive_audio_files = len(positive_audio_files)

# Loop each audio file.
negative_audio_file_pointer = 0
for i in range(total_positive_audio_files):
    # Get audio file path.
    audio_file = positive_audio_files[i]
    # Stream positive audio file to MCU.
    streamAudioFile(audio_file)
    # Stream negative audio file to MCU.
    streamAudioFile(negative_audio_files[negative_audio_file_pointer])
    # Increment the negative audio file pointer.
    negative_audio_file_pointer += 1
    # Check if the negative audio file pointer is out of range.
    if negative_audio_file_pointer >= len(negative_audio_files):
        negative_audio_file_pointer = 0
    # Read the response from the MCU.
    try:
        response = ser.readline().decode('utf-8').strip()
    except Exception as e:
        print(f"Error reading response: {e}")
        break
    if not response:
        print("No response from MCU.")
        break
    # Append the response to the results list.
    results.append(response)
    print(f"Guess: {response} - Class: {np.argmax(positive_audio_file_classes[i])} Accuracy: {calc_accuracy(positive_audio_file_classes_categorical, results)} ({i}/{total_positive_audio_files})")

Sending data to MCU:  -0.005573388
Sending data to MCU:  -0.01613151
Sending data to MCU:  0.0049564764
Sending data to MCU:  0.038336165
Sending data to MCU:  0.07280128
Sending data to MCU:  0.09647471
Sending data to MCU:  0.039379515
Sending data to MCU:  -0.05691777
Sending data to MCU:  -0.092007026
Sending data to MCU:  -0.09240148
Sending data to MCU:  -0.1250557
Sending data to MCU:  -0.13289186
Sending data to MCU:  -0.13876301
Sending data to MCU:  -0.17169237
Sending data to MCU:  -0.25505704
Sending data to MCU:  -0.35213766
Sending data to MCU:  -0.3926767
Sending data to MCU:  -0.36168054
Sending data to MCU:  -0.32953233
Sending data to MCU:  -0.351603
Sending data to MCU:  -0.32957205
Sending data to MCU:  -0.2799457
Sending data to MCU:  -0.25994265
Sending data to MCU:  -0.21545911
Sending data to MCU:  -0.20576206
Sending data to MCU:  -0.19279438
Sending data to MCU:  -0.141763
Sending data to MCU:  -0.09436169
Sending data to MCU:  -0.096418336
Sending data to MCU

In [26]:
# Close the serial port
ser.close()