# Test Audio Sender

This file is used for sending audio data to the device. It measures the accuracy based on individual audio files.

**Note that the software on the MCU must match this!**

In [1]:
AUDIO_FOLDER_PATH = "./audio"
POSITIVE_PATH = f"{AUDIO_FOLDER_PATH}/positive"
NEGATIVE_PATH = f"{AUDIO_FOLDER_PATH}/negative"

In [2]:
import numpy as np
import pandas as pd
import gc
import os
import librosa
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import serial
import struct

2025-04-10 11:05:16.330853: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-10 11:05:16.334643: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-10 11:05:16.345073: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744275916.363002   11199 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744275916.368199   11199 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744275916.381451   11199 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

In [4]:
# Collecte positive audio files and their classes
positive_audio_files = []
positive_audio_file_classes = []
# Look each file in the audio folder
audio_class_folders = os.listdir(POSITIVE_PATH)
# Loop each class folder
for audio_class_folder in audio_class_folders:
    # Assemble full audio class folder path.
    audio_class_folder_path = os.path.join(POSITIVE_PATH, audio_class_folder)
    print("Processing class folder: ", audio_class_folder_path)
    # Get all files in the audio class folder
    audio_class_files = os.listdir(audio_class_folder_path)
    # Loop each audio file in the audio class folder
    for audio_class_file in audio_class_files:
        # Assemble full audio file path.
        audio_file_path = os.path.join(audio_class_folder_path, audio_class_file)
        # Append the audio file path to the positive_audio_files list
        positive_audio_files.append(audio_file_path)
        # Append the audio class to the audio_file_classes list
        positive_audio_file_classes.append(audio_class_folder)
print("positive_audio_files length: ", len(positive_audio_files))
print("positive_audio_file_classes length: ", len(positive_audio_file_classes))

Processing class folder:  ./audio/positive/Car
Processing class folder:  ./audio/positive/Comm
Processing class folder:  ./audio/positive/Motorcycle
positive_audio_files length:  9362
positive_audio_file_classes length:  9362


In [5]:
positive_audio_file_classes[0]

'Car'

In [6]:
# Collecte negative audio files and their classes
negative_audio_files = []
negative_audio_file_classes = []
# Look each file in the audio folder
audio_class_folders = os.listdir(NEGATIVE_PATH)
# Loop each class folder
for audio_class_folder in audio_class_folders:
    # Assemble full audio class folder path.
    audio_class_folder_path = os.path.join(NEGATIVE_PATH, audio_class_folder)
    print("Processing class folder: ", audio_class_folder_path)
    # Get all files in the audio class folder
    audio_class_files = os.listdir(audio_class_folder_path)
    # Loop each audio file in the audio class folder
    for audio_class_file in audio_class_files:
        # Assemble full audio file path.
        audio_file_path = os.path.join(audio_class_folder_path, audio_class_file)
        # Append the audio file path to the negative_audio_files list
        negative_audio_files.append(audio_file_path)
        # Append the audio class to the audio_file_classes list
        negative_audio_file_classes.append(audio_class_folder)
print("negative_audio_files length: ", len(negative_audio_files[0:10]))
print("negative_audio_file_classes length: ", len(negative_audio_file_classes[0:10]))

Processing class folder:  ./audio/negative/background_traffic
negative_audio_files length:  10
negative_audio_file_classes length:  10


In [7]:
# Shuffle the audio files and classes with the same seed.
seed = 42
np.random.seed(seed)
np.random.shuffle(positive_audio_files)

np.random.seed(seed)
np.random.shuffle(positive_audio_file_classes)

np.random.seed(seed)
np.random.shuffle(negative_audio_files)

np.random.seed(seed)
np.random.shuffle(negative_audio_file_classes)

In [8]:
# Hot end code the labels.
label_encoder = LabelEncoder()
positive_audio_file_classes_categorical = to_categorical(label_encoder.fit_transform(positive_audio_file_classes))
print("Example of audio_file_classes_categorial: ", positive_audio_file_classes_categorical[0:10])

Example of audio_file_classes_categorial:  [[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]]


In [9]:
# Connect to MCU via serial.
print("Configuring serial port...")
ser = serial.Serial(
    port='/dev/ttyACM0',  # Change this to your actual port, e.g., 'COM3' on Windows, '/dev/ttyS0' on Linux
    baudrate=115200,       # Set baud rate to 115200
    bytesize=serial.EIGHTBITS,
    parity=serial.PARITY_NONE,
    stopbits=serial.STOPBITS_ONE,
    timeout=1              # Set timeout for reading
)

Configuring serial port...


In [10]:
if ser.is_open:
    print(f"Serial port {ser.port} opened at {ser.baudrate} baud.")

Serial port /dev/ttyACM0 opened at 115200 baud.


In [11]:
def calc_accuracy(y_true, y_pred, audioFilesProcessed):
    success = 0
    for i in range(len(y_pred)):
        if y_pred[i] == y_true[i]:
            success += 1
    res = round(success / audioFilesProcessed, 4)
    return res

In [12]:
audioSent = 0
audioFilesProcessed = 0
results = []
realResults = []

In [13]:
positive_audio_file_classes_categorical[0]

array([1., 0., 0.])

In [14]:
def streamAudioFile(file_path, i):
    global audioSent
    global results
    global realResults
    global positive_audio_file_classes_categorical
    global audioFilesProcessed
    numOfMaxTransferes = 20 * 4000
    audioFilesProcessed += 1
    # Check file format.
    if not file_path.endswith('.wav'):
        print("Error: Only .wav files are supported.")
        return
    # Load the audio file.
    audio_data, sr = librosa.load(file_path, sr=None)
    # Convert sample rate to 16kHz.
    if sr != 16000:
        audio_data = librosa.resample(y=audio_data, orig_sr=sr, target_sr=16000)
        sr = 16000
    # Convert the audio data to a numpy array.
    audio_data = np.array(audio_data, dtype=np.float32)
    # Send the audio data to the MCU.
    for j in range(len(audio_data)):
        numOfMaxTransferes = numOfMaxTransferes - 1
        if numOfMaxTransferes == 0:
            print("Error: Too many transfers. Next file.")
            return
        data_to_send = struct.pack('f', audio_data[j])
        ser.write(data_to_send)
        audioSent += 1
        if audioSent % 4000:
            while ser.in_waiting > 0:
                response = ser.readline().decode('utf-8').strip()
                # If response does not start with "f:"
                if response.startswith("e:"):
                    print(response)
                    return
                if response.startswith("c:"):
                    print(response)
                if response.startswith("s:"):
                    print(response)
                if response.startswith("v:"):
                    # Remove two first characters from the response.
                    response = int(response[2:])
                    realResults.append(np.argmax(positive_audio_file_classes_categorical[0][i]))
                    # Append the response to the results list.
                    results.append(response)
    print(f"Accuracy: {calc_accuracy(realResults, results, audioFilesProcessed)}")

In [15]:

total_positive_audio_files = len(positive_audio_files)

# Loop each audio file.
negative_audio_file_pointer = 0
for i in range(total_positive_audio_files):
    # Get audio file path.
    audio_file = positive_audio_files[i]
    # Print audio file path and class.
    print("Processing audio file: ", audio_file)
    print("Processing audio file class: ", positive_audio_file_classes[i])
    # Stream positive audio file to MCU.
    streamAudioFile(audio_file, i)
    # Print nagtive audio file path and class.
        #print("Processing negative audio file: ", negative_audio_files[negative_audio_file_pointer])
        #print("Processing negative audio file class: ", negative_audio_file_classes[negative_audio_file_pointer])
    # Stream negative audio file to MCU.
        #streamAudioFile(negative_audio_files[negative_audio_file_pointer], i)
    # Increment the negative audio file pointer.
    negative_audio_file_pointer += 1
    # Check if the negative audio file pointer is out of range.
    if negative_audio_file_pointer >= len(negative_audio_files):
        negative_audio_file_pointer = 0

Processing audio file:  ./audio/positive/Car/2019-11-18-07-25_Langewiesener-Strasse_50Kmh_52813_M_D_CR_SE_CH34.wav
Processing audio file class:  Car
c: [0,1,0,0]
c: [0,1,0,1]
c: [0,1,1,1]
c: [0,1,2,1]
c: [0,1,3,1]
c: [0,2,3,1]
c: [0,2,3,2]
Accuracy: 0.0
Processing audio file:  ./audio/positive/Car/2019-10-22-15-30_Fraunhofer-IDMT_30Kmh_3289858_A_D_CL_ME_CH12.wav
Processing audio file class:  Car
c: [0,2,4,2]
c: [0,2,4,3]
c: [0,3,4,3]
c: [0,3,5,3]
c: [0,4,5,3]
c: [0,5,5,3]
c: [0,6,5,3]
c: [0,6,6,3]
Accuracy: 0.0
Processing audio file:  ./audio/positive/Car/2019-11-19-15-25_Langewiesener-Strasse_50Kmh_3503746_A_D_CR_SE_CH34.wav
Processing audio file class:  Car
c: [0,6,6,4]
c: [0,7,6,4]
c: [0,7,6,5]
c: [0,7,7,5]
c: [0,7,8,5]
c: [0,7,8,6]
c: [0,8,8,6]
c: [0,8,9,6]
Accuracy: 0.0
Processing audio file:  ./audio/positive/Car/2019-10-22-15-30_Fraunhofer-IDMT_30Kmh_3508408_A_D_CL_ME_CH12.wav
Processing audio file class:  Car
c: [0,8,10,6]
c: [0,8,10,7]
c: [0,8,10,8]
c: [0,8,10,9]
c: [0,9,10,9]

KeyboardInterrupt: 

In [None]:
# Close the serial port
ser.close()