In [None]:
from reservoirpy.datasets import mackey_glass
from reservoirpy.observables import rmse, rsquare
from reservoirpy.nodes import Reservoir, Ridge
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
from scipy import signal
from tqdm import tqdm
from reservoirpy.nodes import Reservoir, Ridge, FORCE, ESN

In [None]:
def load_and_preprocess_data(file_paths, target_length): 
    data = []
    for file_path in tqdm(file_paths, desc="Loading and preprocessing data", unit="file"):
        audio, sr = librosa.load(file_path, sr=None)
        if len(audio) < target_length:
            audio = np.pad(audio, (0, target_length - len(audio)))
        else: 
            audio = audio[:target_length]

        audio = bandpass_filter(audio, sr)

        data.append(audio)

    print("Done")
    return np.array(data)

def bandpass_filter(audio, sr):
    sos = signal.butter(6, [5000, 100000], 'bandpass', fs=sr, output='sos')
    audio = signal.sosfiltfilt(sos, audio)
    return audio

In [None]:
#! ===== Set parameters ======
grandparent_dir = Path.cwd().parents[0]
print(grandparent_dir)
test_directory = grandparent_dir / ".dataset" / "X_test"
train_directory = grandparent_dir / ".dataset" / "X_train"

# Set the path to the downloaded data
download_path = grandparent_dir / ".dataset"

# Audio parameters
sample_rate = 256000
audio_duration_seconds = 0.2 

In [None]:
#! ====== Load and preprocess data ====== 
# Read labels file
labels_file = download_path / "Y_train_ofTdMHi.csv"
df = pd.read_csv(labels_file)

# Construct file path by concatenating folder and file name
df["relative_path"] = Path(download_path) / "X_train" / df["id"]
# df["relative_path"] = str(download_path) + "/X_train/" + df["id"]

# Drop id column (replaced it with relative_path)
df.drop(columns=["id"], inplace=True)

df.rename(columns={"pos_label": "label"}, inplace=True)

# invert relative_path and label columns positions
df = df[["relative_path", "label"]]
print(f"### There are {len(df)} audio files in the dataset.")

table = f"""
Here is the split into good and bad signals:
| Label   | Count   |
|:-------:|:-------:|
| 0       | {df['label'].value_counts()[0]:7} |
| 1       | {df['label'].value_counts()[1]:7} |"""
print(table, end="\n\n")
print("### Here is a sample of the data:")
print(df.sample(5))


In [None]:
target_length = int(sample_rate * audio_duration_seconds)
X = load_and_preprocess_data(df["relative_path"], target_length)

y = df["label"].values.astype(int)

reservoir = Reservoir(100, lr=0.5, sr=0.9)


states = reservoir.run(X)

In [None]:
print(X.shape)
print(X[0].shape)

In [None]:
print(states.shape)

In [None]:
# train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
readout = Ridge(ridge=1e-7)
train_states = reservoir.run(X_train, reset=True)

In [None]:
readout = readout.fit(train_states, y_train, warmup=10)