<a href="https://colab.research.google.com/github/varshacvenkat-web/Varsha-Venkatapathy-Engineering-Portfolio-/blob/main/article_4_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import torch

# Parameters
sampling_rate = 16000
frame_length = 512  # 32 ms at 16 kHz
frame_shift = 128   # 8 ms at 16 kHz
snr_levels = [-5, 0, 5]  # SNR values in dB
clean_data_path = "/path/to/clean/data/"  # Example: LibriSpeech clean speech
noise_data_path = "/path/to/noise/data/"  # Example: noise files

# Function to compute Log-Power Spectrum (LPS)
def compute_lps(signal, n_fft=512, hop_length=128):
    stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length, window='hann')
    magnitude = np.abs(stft) ** 2  # Power spectrum
    lps = np.log10(np.maximum(magnitude, 1e-8))  # Avoid log(0)
    return lps, stft

# Function to add noise at a specified SNR
def add_noise(clean_signal, noise_signal, snr):
    # Adjust noise to match the desired SNR
    clean_rms = np.sqrt(np.mean(clean_signal ** 2))
    noise_rms = np.sqrt(np.mean(noise_signal ** 2))
    desired_noise_rms = clean_rms / (10 ** (snr / 20))
    noise_signal = noise_signal * (desired_noise_rms / noise_rms)
    noisy_signal = clean_signal + noise_signal
    return noisy_signal

# Generate Ideal Ratio Mask (IRM)
def generate_irm(clean_stft, noisy_stft):
    irm = np.abs(clean_stft) / (np.abs(noisy_stft) + 1e-8)
    irm = np.minimum(irm, 1)  # Ensure the mask is within [0, 1]
    return irm

# Preprocessing Pipeline
def preprocess_data(clean_data_path, noise_data_path, snr_levels):
    clean_files = os.listdir(clean_data_path)
    noise_files = os.listdir(noise_data_path)

    for clean_file in clean_files:
        # Load clean speech
        clean_signal, _ = librosa.load(os.path.join(clean_data_path, clean_file), sr=sampling_rate)
        clean_lps, clean_stft = compute_lps(clean_signal)

        # Select a random noise file
        noise_file = np.random.choice(noise_files)
        noise_signal, _ = librosa.load(os.path.join(noise_data_path, noise_file), sr=sampling_rate)

        # Trim or pad noise to match clean signal length
        if len(noise_signal) < len(clean_signal):
            noise_signal = np.tile(noise_signal, int(np.ceil(len(clean_signal) / len(noise_signal))))
        noise_signal = noise_signal[:len(clean_signal)]

        for snr in snr_levels:
            # Add noise at specified SNR
            noisy_signal = add_noise(clean_signal, noise_signal, snr)
            noisy_lps, noisy_stft = compute_lps(noisy_signal)

            # Generate IRM
            irm = generate_irm(clean_stft, noisy_stft)

            # Normalize the LPS features
            noisy_lps_normalized = (noisy_lps - np.mean(noisy_lps)) / np.std(noisy_lps)

            # Save preprocessed data
            np.savez(f"preprocessed_snr{snr}_{clean_file.split('.')[0]}.npz",
                     noisy_lps=noisy_lps_normalized, irm=irm)
            print(f"Processed {clean_file} with SNR {snr}")

# Run Preprocessing
preprocess_data(clean_data_path, noise_data_path, snr_levels)
