# Test Audio Sender

This file is used for sending audio data to the device. It measures the accuracy based on individual audio files.

**Note that the software on the MCU must match this!**

In [108]:
AUDIO_FOLDER_PATH = r"C:\Users\Pontu\OneDrive\Skrivbord\lib3\repo\dataset-converter\dataset_mix"
POSITIVE_PATH = f"{AUDIO_FOLDER_PATH}/positive"
NEGATIVE_PATH = f"{AUDIO_FOLDER_PATH}/negative"

In [109]:
import numpy as np
import pandas as pd
import gc
import os
import librosa
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import serial
import struct

In [110]:
# Collecte positive audio files and their classes
positive_audio_files = []
positive_audio_file_classes = []
# Look each file in the audio folder
audio_class_folders = os.listdir(POSITIVE_PATH)
# Loop each class folder
for audio_class_folder in audio_class_folders:
    # Assemble full audio class folder path.
    audio_class_folder_path = os.path.join(POSITIVE_PATH, audio_class_folder)
    print("Processing class folder: ", audio_class_folder_path)
    # Get all files in the audio class folder
    audio_class_files = os.listdir(audio_class_folder_path)
    # Loop each audio file in the audio class folder
    for audio_class_file in audio_class_files:
        # Assemble full audio file path.
        audio_file_path = os.path.join(audio_class_folder_path, audio_class_file)
        # Append the audio file path to the positive_audio_files list
        positive_audio_files.append(audio_file_path)
        # Append the audio class to the audio_file_classes list
        positive_audio_file_classes.append(audio_class_folder)
print("positive_audio_files length: ", len(positive_audio_files))
print("positive_audio_file_classes length: ", len(positive_audio_file_classes))

Processing class folder:  C:\Users\Pontu\OneDrive\Skrivbord\lib3\repo\dataset-converter\dataset_mix/positive\Car
Processing class folder:  C:\Users\Pontu\OneDrive\Skrivbord\lib3\repo\dataset-converter\dataset_mix/positive\Comm
Processing class folder:  C:\Users\Pontu\OneDrive\Skrivbord\lib3\repo\dataset-converter\dataset_mix/positive\Motorcycle
positive_audio_files length:  18597
positive_audio_file_classes length:  18597


In [111]:
positive_audio_file_classes[0]

'Car'

In [112]:
# Collecte negative audio files and their classes
negative_audio_files = []
negative_audio_file_classes = []
# Look each file in the audio folder
audio_class_folders = os.listdir(NEGATIVE_PATH)
# Loop each class folder
for audio_class_folder in audio_class_folders:
    # Assemble full audio class folder path.
    audio_class_folder_path = os.path.join(NEGATIVE_PATH, audio_class_folder)
    print("Processing class folder: ", audio_class_folder_path)
    # Get all files in the audio class folder
    audio_class_files = os.listdir(audio_class_folder_path)
    # Loop each audio file in the audio class folder
    for audio_class_file in audio_class_files:
        # Assemble full audio file path.
        audio_file_path = os.path.join(audio_class_folder_path, audio_class_file)
        # Append the audio file path to the negative_audio_files list
        negative_audio_files.append(audio_file_path)
        # Append the audio class to the audio_file_classes list
        negative_audio_file_classes.append(audio_class_folder)
print("negative_audio_files length: ", len(negative_audio_files[0:10]))
print("negative_audio_file_classes length: ", len(negative_audio_file_classes[0:10]))

Processing class folder:  C:\Users\Pontu\OneDrive\Skrivbord\lib3\repo\dataset-converter\dataset_mix/negative\background
negative_audio_files length:  10
negative_audio_file_classes length:  10


In [113]:
# Shuffle the audio files and classes with the same seed.
seed = 42
np.random.seed(seed)
np.random.shuffle(positive_audio_files)

np.random.seed(seed)
np.random.shuffle(positive_audio_file_classes)

np.random.seed(seed)
np.random.shuffle(negative_audio_files)

In [114]:
# Hot end code the labels.
label_encoder = LabelEncoder()
positive_audio_file_classes_categorical = to_categorical(label_encoder.fit_transform(positive_audio_file_classes))
print("Example of audio_file_classes_categorial: ", positive_audio_file_classes_categorical[0:10])

Example of audio_file_classes_categorial:  [[0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]


In [115]:
# Connect to MCU via serial.
print("Configuring serial port...")
ser = serial.Serial(
    port='COM5',  # Change this to your actual port, e.g., 'COM3' on Windows, '/dev/ttyS0' on Linux
    baudrate=921600,       # Set baud rate to 921600
    bytesize=serial.EIGHTBITS,
    parity=serial.PARITY_NONE,
    stopbits=serial.STOPBITS_ONE,
    timeout=0.1,           # Set timeout for reading
    write_timeout=None     # Wait indefinitely until all data is sent
)

Configuring serial port...


In [116]:
if ser.is_open:
    print(f"Serial port {ser.port} opened at {ser.baudrate} baud.")

Serial port COM5 opened at 921600 baud.


In [None]:
def calc_accuracy(y_true, y_pred):
    success = 0
    for i in range(len(y_pred)):
        if y_pred[i] == y_true[i]:
            success += 1
    res = round(success / len(y_pred), 3)
    return res

import numpy as np

def confusion_matrix_manual(y_true, y_pred):
    labels = sorted(set(y_true + y_pred))  # All unique labels
    matrix = {label: {l: 0 for l in labels} for label in labels}

    for real, pred in zip(y_true, y_pred):
        matrix[real][pred] += 1

    return matrix


def print_confusion_matrix(cm):
    labels = list(cm.keys())
    print("    " + " ".join(f"{l:>4}" for l in labels))
    for real in labels:
        row = " ".join(f"{cm[real][pred]:>4}" for pred in labels)
        print(f"{real:>4} {row}")

In [118]:
audioSent = 0
audioFilesProcessed = 0
results = []
realResults = []

In [119]:
positive_audio_file_classes_categorical[0]

array([0., 1., 0.])

In [None]:
def streamAudioFile(file_path, realClass, i):
    global audioSent
    global results
    global realResults
    global audioBuffer
    voted = False
    # Append the real result to the results list.
    realResults.append(np.argmax(realClass))
    #print(f"Adding realResults: {realResults}")
    # Check file format.
    if not file_path.endswith('.wav'):
        print("Error: Only .wav files are supported.")
        return
    # Load the audio file.
    audio_data, sr = librosa.load(file_path, sr=None)
    # Convert sample rate to 16kHz.
    if sr != 16000:
        audio_data = librosa.resample(y=audio_data, orig_sr=sr, target_sr=16000)
        sr = 16000
    # Convert the audio data to a numpy array.
    audio_data = np.array(audio_data, dtype=np.float32)
    # Send the audio data to the MCU.
    #for j in range(len(audio_data)):
    #    data_to_send = audio_data[j].tobytes()
    ser.write(audio_data.tobytes())

    while ser.in_waiting > 0:
        response = ser.readline().decode('utf-8').strip()
        if response.startswith("fin:"):
            matrix = confusion_matrix_manual(realResults, results)
            print_confusion_matrix(matrix)
        if response.startswith("e:"):
            print(response)
            return
        if response.startswith("c:"):
            print(response)
        if response.startswith("s:"):
            print(response)
        if response.startswith("v:"):
            voted = True
            print(response)
            #print(f"Adding {int(response[2:])} to results")
            response = int(response[2:])
            results.append(response)
            if len(results) > len(realResults):
                realResults.append(9)
            print(f"Accuracy: {calc_accuracy(realResults, results)}")
    # If result is shorter than realResults, append 9 to results.
    if i > 0 and len(results) < len(realResults) and not voted:
        results.append(9)
        print(f"Accuracy: {calc_accuracy(realResults, results)}")

    while len(results) < len(realResults):
        results.append(9)  # 9 = "no vote" or "unknown"


In [121]:

total_positive_audio_files = len(positive_audio_files)

# Loop each audio file.
negative_audio_file_pointer = 0
for i in range(total_positive_audio_files):
    # Get audio file path.
    audio_file = positive_audio_files[i]
    # Print audio file path and class.
    print("Processing audio file: ", audio_file)
    print("Processing audio file class: ", positive_audio_file_classes[i])
    # Stream positive audio file to MCU.
    streamAudioFile(audio_file, positive_audio_file_classes_categorical[i], i)
    # Print nagtive audio file path and class.
    print("Processing negative audio file: ", negative_audio_files[negative_audio_file_pointer])
    # Stream negative audio file to MCU.
    streamAudioFile(negative_audio_files[negative_audio_file_pointer], [0,0,0,1], i)
    # Increment the negative audio file pointer.
    negative_audio_file_pointer = (negative_audio_file_pointer + 1) % len(negative_audio_files)

Processing audio file:  C:\Users\Pontu\OneDrive\Skrivbord\lib3\repo\dataset-converter\dataset_mix/positive\Comm\2019-11-19-15-25_Langewiesener-Strasse_50Kmh_2223666_A_D_TL_SE_CH34.wav
Processing audio file class:  Comm
s:Heap: 1100 / 470492 bytes
s:Tensor arena size: 16160/37000 bytes
s:Input tensor shape: 1, 16, 8, 1
s:Output tensor shape: 1, 4
c:COUNT --------------------------------------------- 0
c:COUNT --------------------------------------------- 1
c:COUNT --------------------------------------------- 2
c:COUNT --------------------------------------------- 3
c:COUNT --------------------------------------------- 4
c:COUNT --------------------------------------------- 5
c:COUNT --------------------------------------------- 6
Processing negative audio file:  C:\Users\Pontu\OneDrive\Skrivbord\lib3\repo\dataset-converter\dataset_mix/negative\background\2019-11-13-08-00_Schleusinger-Allee_70Kmh_13716480_ME_CH12-BG.wav
c:COUNT --------------------------------------------- 7
c:COUNT ---

KeyboardInterrupt: 

In [None]:
# Close the serial port
ser.close()