In [15]:
import os
import librosa
import numpy as np
import pandas as pd
import soundfile as sf

In [27]:
# File paths for the audio files and corresponding CSV annotations
audio_files = ['../data/Development_set/Development_Set/Training_Set/MT/dcase_MK1.wav', '../data/Development_set/Development_Set/Training_Set/MT/dcase_MK2.wav']
csv_files = ['../data/Development_set/Development_Set/Training_Set/MT/dcase_MK1.csv', '../data/Development_set/Development_Set/Training_Set/MT/dcase_MK2.csv']

# Output directory for AFE segments
output_dir = 'processed_segments'
os.makedirs(output_dir, exist_ok=True)

In [28]:
# Adapted from Esther's preprocessing code for audio segmentation from processed_segments.py
# Target sampling rate
sr = 8000

# Iterate through each audio file and its corresponding CSV annotation file
for audio_file, csv_file in zip(audio_files, csv_files):
    # Load the audio file
    y, sr = librosa.load(audio_file, sr=sr)

    # Load the CSV annotations
    annotations = pd.read_csv(csv_file)

    # Iterate over each row in the CSV to extract annotation details
    for i, annotation in annotations.iterrows():
        # Skip the segment if any of the annotation columns contain 'UNK'
        if 'UNK' in [annotation['SNMK'], annotation['CCMK'], annotation['AGGM'], annotation['SOCM']]:
            continue

        start_time = annotation['Starttime']
        end_time = annotation['Endtime']

        # Calculate the event center time
        event_center = (start_time + end_time) / 2

        # Calculate the segment start and end times to ensure a 1-second duration
        segment_start = max(0, event_center - 0.5)  # Ensure the start time is not negative
        segment_end = segment_start + 1.0  # End time is 1 second after the start time

        # Extract the segment from the audio file
        start_sample = int(segment_start * sr)
        end_sample = int(segment_end * sr)
        segment = y[start_sample:end_sample]

        # If the segment is less than 1 second, pad it with zeros
        if len(segment) < sr:
            segment = np.pad(segment, (0, sr - len(segment)), 'constant')

        # Determine the label based on the annotation
        if annotation['SNMK'] == 'POS':
            label = 1  # SNMK
        elif annotation['CCMK'] == 'POS':
            label = 2  # CCMK
        elif annotation['AGGM'] == 'POS':
            label = 3  # AGGM
        elif annotation['SOCM'] == 'POS':
            label = 4  # SOCM
        else:
            label = 7  # unknown

        # Create the output filename based on the audio file name, label, and segment index
        output_filename = os.path.join(output_dir, f'{os.path.basename(audio_file).replace(".wav", "")}_segment_{i+1}_label_{label}.wav')

        # Save the segment as a WAV file using the soundfile library
        sf.write(output_filename, segment, sr)

    # Process background noise segments
    no_event_segments = []
    for i in range(len(y) // sr):
        segment_start = i * sr
        segment_end = segment_start + sr
        segment = y[segment_start:segment_end]

        # Check if the segment overlaps with any event
        overlaps = any(start_sample <= segment_start <= end_sample or start_sample <= segment_end <= end_sample
                       for start_sample, end_sample in zip(annotations['Starttime'] * sr, annotations['Endtime'] * sr))

        if not overlaps:
            no_event_segments.append(segment)

    # Randomly select the same number of no-event segments as there are event segments
    np.random.shuffle(no_event_segments)
    selected_no_event_segments = no_event_segments[:len(annotations)]

    # Save the no-event segments with label 0 (background noise)
    for i, segment in enumerate(selected_no_event_segments):
        output_filename = os.path.join(output_dir, f'segment_{os.path.basename(audio_file).replace(".wav", "")}_{i+1}_label_0.wav')
        sf.write(output_filename, segment, sr)

In [4]:
# - Switch off warnings
import warnings

warnings.filterwarnings("ignore")

# - Import numpy
import numpy as np
import scipy as sp

# - Import the plotting library
import sys
!{sys.executable} -m pip install --quiet matplotlib
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams["figure.figsize"] = [16, 10]
plt.rcParams["figure.dpi"] = 300

# - Rich printing
try:
    from rich import print
except:
    pass

In [5]:
# - import AFE
from rockpool.devices.xylo.syns61201 import AFESim

In [6]:
# - AFE parameters

fs = 8e3                          # The sampling frequency of the input, in Hz
raster_period = 10e-3               # The output rasterisation time-step in seconds
max_spike_per_raster_period = 15    # Maximum number of events per output time-step

add_noise = False                    # Enables / disables simulated noise generated by the AFE
add_offset = False                 # Add mismatch offset to each filter
add_mismatch = False                 # Add simualted mismatch to filter parameters
seed = None                         # Seed for mistmatch generation

# - Initialize the AFE simulation, and convert it to a high-level `TimedModule`

afe = AFESim(
        fs = fs,
        raster_period = raster_period,
        max_spike_per_raster_period = max_spike_per_raster_period,
        add_noise = add_noise,
        add_offset = add_offset,
        add_mismatch = add_mismatch,
        seed = seed,
).timed()

ValueError: Sampling frequency (8000.0) must be at least 6 times the highest BPF centre freq. (i.e. >101640 Hz)
                The main reason is that the microphone produces THD (third-order distortion) which may fallback into the wrong frequency
                if the sampling frequency is not large enough.
                