<a href="https://colab.research.google.com/github/varshacvenkat-web/Varsha-Venkatapathy-Engineering-Portfolio-/blob/main/preporcessing_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import shutil
import numpy as np
import librosa
import scipy.signal

# Parameters
SAMPLE_RATE = 16000  # Target sample rate
FRAME_SIZE = 512  # Frame length for STFT
OVERLAP = 256  # Hop length for STFT (50% overlap)
OUTPUT_DIR = "processed_data_article_3"
CLEAN_AUDIO_PATH = r"C:\Users\enhance\converted_clean_wav"  # Clean audio folder
NOISE_PARENT_DIR = r"C:\Users\enhance\Downloads\all_noises"  # Noise folder

# Function to clear the output folder
def clear_folder(folder_path):
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)
    os.makedirs(folder_path, exist_ok=True)
    print(f"Cleared and recreated folder: {folder_path}")

# Function to extract audio files
def extract_files(parent_dir, extension=".wav"):
    files = []
    for root, dirs, filenames in os.walk(parent_dir):
        for file in filenames:
            if file.endswith(extension):
                files.append(os.path.join(root, file))
    print(f"Found {len(files)} files in {parent_dir}.")
    return files

# Function to mix clean and noisy audio at a specific SNR
def mix_clean_noisy(clean_audio, noise_audio, snr_db):
    clean_power = np.mean(clean_audio ** 2)
    noise_power = np.mean(noise_audio ** 2)
    scaling_factor = np.sqrt(clean_power / (noise_power * 10 ** (snr_db / 10)))
    noisy_audio = clean_audio + scaling_factor * noise_audio
    return noisy_audio

# Function to compute STFT and extract real/imaginary parts
def compute_stft_features(audio, frame_size, overlap, sr):
    f, t, Zxx = scipy.signal.stft(audio, fs=sr, nperseg=frame_size, noverlap=overlap)
    real_part = np.real(Zxx)
    imag_part = np.imag(Zxx)
    return real_part, imag_part

# Function to preprocess and save data
def preprocess_audio(clean_path, noise_path, snr, sr=SAMPLE_RATE):
    clean_audio, _ = librosa.load(clean_path, sr=sr)
    noise_audio, _ = librosa.load(noise_path, sr=sr)

    # Match noise length to clean audio
    if len(noise_audio) < len(clean_audio):
        noise_audio = np.tile(noise_audio, int(np.ceil(len(clean_audio) / len(noise_audio))))
    noise_audio = noise_audio[:len(clean_audio)]

    # Mix clean and noisy audio
    noisy_audio = mix_clean_noisy(clean_audio, noise_audio, snr)

    # Compute STFT features
    clean_real, clean_imag = compute_stft_features(clean_audio, FRAME_SIZE, OVERLAP, sr)
    noisy_real, noisy_imag = compute_stft_features(noisy_audio, FRAME_SIZE, OVERLAP, sr)

    return (clean_real, clean_imag), (noisy_real, noisy_imag)

# Clear the output folder
clear_folder(OUTPUT_DIR)

# Extract audio files
clean_files = extract_files(CLEAN_AUDIO_PATH)
noise_files = extract_files(NOISE_PARENT_DIR)

# Process files
for clean_file in clean_files:
    clean_filename = os.path.basename(clean_file).replace(".wav", "")
    clean_output_dir = os.path.join(OUTPUT_DIR, clean_filename)
    os.makedirs(clean_output_dir, exist_ok=True)

    for noise_file in noise_files:
        noise_filename = os.path.basename(noise_file).replace(".wav", "")
        print(f"Processing clean file: {clean_file} with noise file: {noise_file}")

        for snr in [-5, 0, 5]:
            (clean_real, clean_imag), (noisy_real, noisy_imag) = preprocess_audio(
                clean_file, noise_file, snr
            )

            # Save features
            clean_real_file = os.path.join(clean_output_dir, f"clean_real_{snr}_{noise_filename}.npy")
            clean_imag_file = os.path.join(clean_output_dir, f"clean_imag_{snr}_{noise_filename}.npy")
            noisy_real_file = os.path.join(clean_output_dir, f"noisy_real_{snr}_{noise_filename}.npy")
            noisy_imag_file = os.path.join(clean_output_dir, f"noisy_imag_{snr}_{noise_filename}.npy")

            np.save(clean_real_file, clean_real)
            np.save(clean_imag_file, clean_imag)
            np.save(noisy_real_file, noisy_real)
            np.save(noisy_imag_file, noisy_imag)

            print(f"Saved STFT features for SNR {snr} in {clean_output_dir}.")


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

# Path to the processed data directory
OUTPUT_DIR = "processed_data_article_3"

# List of SNR levels
SNRS = [-5, 0, 5]

def compute_average_spectrogram(clean_output_dir, snr):
    """Compute the average spectrogram for a given SNR level."""
    snr_files = [f for f in os.listdir(clean_output_dir) if f"clean_real_{snr}_" in f]

    if not snr_files:
        print(f"No files found for SNR {snr} in {clean_output_dir}.")
        return None

    spectrograms = []
    for file in snr_files:
        clean_real = np.load(os.path.join(clean_output_dir, file))
        magnitude = np.log(np.abs(clean_real) + 1e-10)  # Log magnitude to improve visualization
        spectrograms.append(magnitude)

    avg_spectrogram = np.mean(spectrograms, axis=0)
    return avg_spectrogram

def plot_spectrogram(spectrogram, snr, clean_file):
    """Plot the average spectrogram."""
    plt.figure(figsize=(10, 6))
    plt.imshow(spectrogram, aspect='auto', origin='lower', cmap='viridis')
    plt.colorbar(label='Log Magnitude (dB)')
    plt.title(f"Average Spectrogram for SNR {snr} ({clean_file})")
    plt.xlabel("Time Frames")
    plt.ylabel("Frequency Bins")
    plt.show()

def main():
    """Visualize average spectrograms for each SNR for one clean file."""
    clean_files = os.listdir(OUTPUT_DIR)

    # Choose one clean file for visualization
    clean_file = clean_files[0]  # Select the first clean file in the directory
    clean_output_dir = os.path.join(OUTPUT_DIR, clean_file)

    if not os.path.isdir(clean_output_dir):
        print(f"{clean_output_dir} is not a valid directory.")
        return

    print(f"Processing directory: {clean_output_dir}")

    for snr in SNRS:
        avg_spectrogram = compute_average_spectrogram(clean_output_dir, snr)
        if avg_spectrogram is not None:
            plot_spectrogram(avg_spectrogram, snr, clean_file)

if __name__ == "__main__":
    main()
