# ⚙️ Setup

In [None]:
# Do imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import numpy as np
import librosa
from librosa.effects import pitch_shift
import soundfile as sf
import librosa.display
import IPython.display as ipd

In [None]:
comp_path = '/kaggle/input/hms-harmful-brain-activity-classification'
train_spect_dir = '/'.join([comp_path, 'train_spectrograms'])
train_spect_path_list = [entry.path for entry in os.scandir(train_spect_dir)]

# 🔊 Listen to Spectrogram
Each spectrogram represents frequencies over time. These are acquired by performing a short-time fourier transform (stft), breaking a signal down into its frequency components over short windows of time.

We can perform the inverse of this operation (istft) to merge these frequencies back down into the original signal. Then we can listen to the original signal to complement the data we view.

The actual frequencies in the data are much lower than the human ear will allow, so they've all been boosted to hopefully allow us humans to hear the full freuqnecy spectrum of the data in our audible range.



In [None]:
def play_spectrogram(spectrogram_path, measurement):
    sample_spect = pd.read_parquet(spectrogram_path)
    
    split_spect = {
        "LL": sample_spect.filter(regex='^LL', axis=1),
        "RL": sample_spect.filter(regex='^RL', axis=1),
        "RP": sample_spect.filter(regex='^RP', axis=1),
        "LP": sample_spect.filter(regex='^LP', axis=1),
    }
    
    if measurement not in split_spect.keys():
        print(f"Requested measurement '{measurement}' not in options {split_spect.keys()}")
        return
    
    audio_df = split_spect[measurement].T

    # Transpose the DataFrame and convert it to a NumPy array
    frequency_data = audio_df.to_numpy(dtype=np.float32)
    sr = 3000  # Adjust as needed
    pitch_shift_factor = 5
    
    target_frequency_row = 200
    target_frequency = 1000
    n_fft = frequency_data.shape[0]
    delta_f = sr / n_fft
    all_frequencies = np.arange(0, sr/2, delta_f)
    frequencies = [column_name[3:] for column_name in split_spect[measurement].columns]
    
    closest_frequency_row = np.argmin(np.abs(all_frequencies - target_frequency))
    print(f"Row {closest_frequency_row} is closest to {target_frequency} Hz. When pitch-shifted this is {target_frequency*pitch_shift_factor} Hz. Row {closest_frequency_row} is actually {frequencies[closest_frequency_row]} Hz in the data.")
    
    audio_reconstructed = librosa.istft(frequency_data)
    audio_reconstructed_shifted = pitch_shift(y=audio_reconstructed, sr=sr, n_steps=pitch_shift_factor)

    sf.write('/kaggle/working/reconstructed_audio.wav', audio_reconstructed_shifted, sr)
    return ipd.Audio('/kaggle/working/reconstructed_audio.wav')

def plot_spectrogram(spectrogram_path, measurement):
    sample_spect = pd.read_parquet(spectrogram_path)
    
    split_spect = {
        "LL": sample_spect.filter(regex='^LL', axis=1),
        "RL": sample_spect.filter(regex='^RL', axis=1),
        "RP": sample_spect.filter(regex='^RP', axis=1),
        "LP": sample_spect.filter(regex='^LP', axis=1),
    }
    
    if measurement not in split_spect.keys():
        print(f"Requested measurement '{measurement}' not in options {split_spect.keys()}")
        return
    
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 6))
    label_interval = 5
    img = ax.imshow(np.log(split_spect[measurement]).T, cmap='viridis', aspect='auto', origin='lower')  # You can choose any colormap (cmap) that suits your preferences
    cbar = fig.colorbar(img, ax=ax)
    cbar.set_label('Log(Value)')
    ax.set_title(measurement)
    ax.set_ylabel("Frequency (Hz)")
    ax.set_xlabel("Time")

    ax.set_yticks(np.arange(len(split_spect[measurement].columns)))
    ax.set_yticklabels([column_name[3:] for column_name in split_spect[measurement].columns])
    frequencies = [column_name[3:] for column_name in split_spect[measurement].columns]
    ax.set_yticks(np.arange(0, len(split_spect[measurement].columns), label_interval))
    ax.set_yticklabels(frequencies[::label_interval])
    plt.tight_layout()
    plt.show()
    return

def plot_and_play_spectrogram(spectrogram_path, measurement):
    plot_spectrogram(spectrogram_path, measurement)
    return play_spectrogram(spectrogram_path, measurement)

In [None]:
plot_and_play_spectrogram(train_spect_path_list[35], "LL")