In [1]:
import os
from audio_processor import AudioProcessor
from audio_visualizer import AudioVisualizer, visualize_time_frequency_3d
from audio_comparator import AudioComparator
from tqdm import tqdm

import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [2]:
sample_dir = './sample'
processed_dir = './processed_audio'
visualized_dir = './visualized_plots'
compared_dir = './compared_plots'

# Create directories if they don't exist
os.makedirs(processed_dir, exist_ok=True)
os.makedirs(visualized_dir, exist_ok=True)
os.makedirs(compared_dir, exist_ok=True)

# Initialize the audio processor, visualizer, and comparator
processor = AudioProcessor(sample_dir, processed_dir, visualized_dir, compared_dir)
visualizer = AudioVisualizer(visualized_dir)
comparator = AudioComparator(compared_dir)

In [5]:
sample_number = 337721  # 422262

# Load the audio files based on the sample number
audio_files = [file for file in os.listdir(sample_dir) if file.startswith(str(sample_number))]

In [6]:
# Add white noise with different SNR levels
snr_levels = [10, 20, 30]
noisy_audios = []
print("Adding noise to audio...")
for audio_file in tqdm(audio_files):
    original_audio, sr = processor.load_audio(audio_file)
    for snr_db in snr_levels:
        file_name = f'{os.path.splitext(audio_file)[0]}_snr{snr_db}.wav'
        file_path = os.path.join(processed_dir, file_name)
        if os.path.exists(file_path):
            print(f"Skipping {file_name} as it already exists.")
            noisy_audio, _ = processor.load_audio(file_name, directory=processed_dir)  # Pass the directory argument
            noisy_audios.append((noisy_audio, sr, file_name))
        else:
            noisy_audio = processor.add_noise(original_audio, snr_db)
            processor.save_audio(noisy_audio, sr, file_name)
            noisy_audios.append((noisy_audio, sr, file_name))

Adding noise to audio...


100%|████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 41.04it/s]


In [5]:
print("Visualizing audio files...")
# Load the audio files based on the sample number
sample_files = [file for file in os.listdir(sample_dir) if file.startswith(str(sample_number))]
processed_files = [file for file in os.listdir(processed_dir) if file.startswith(str(sample_number))]

# Find the original audio file
original_audio_file = [file for file in sample_files if '_snr' not in file][0]
original_audio, sr = librosa.load(os.path.join(sample_dir, original_audio_file), sr=None)

# Find the snr30, snr20, and snr10 audio files
snr30_audio_file = [file for file in processed_files if file.endswith('_snr30.wav')][0]
snr30_audio, _ = librosa.load(os.path.join(processed_dir, snr30_audio_file), sr=sr)

snr20_audio_file = [file for file in processed_files if file.endswith('_snr20.wav')][0]
snr20_audio, _ = librosa.load(os.path.join(processed_dir, snr20_audio_file), sr=sr)

snr10_audio_file = [file for file in processed_files if file.endswith('_snr10.wav')][0]
snr10_audio, _ = librosa.load(os.path.join(processed_dir, snr10_audio_file), sr=sr)

# Create the list of audio arrays and sample rates
audios = [original_audio, snr30_audio, snr20_audio, snr10_audio]
srs = [sr, sr, sr, sr]


visualizer.visualize_audio(audios, srs, f'compare_SNR_{sample_number}')

Visualizing audio files...


# Issue: SNR masks out high frequency components

In [14]:
# Compare the audios for each SNR level and IR
print("Comparing audios...")
for snr_db in tqdm(snr_levels):
    comparison_files = []
    for audio_file in audio_files:
        if f'{sample_number}' in audio_file:
            original_audio, sr = processor.load_audio(audio_file)
            comparison_files.append((original_audio, sr, audio_file))
            for noisy_audio, sr, noisy_file_name in noisy_audios:
                if f'snr{snr_db}' in noisy_file_name and os.path.splitext(audio_file)[0] in noisy_file_name:
                    comparison_files.append((noisy_audio, sr, noisy_file_name))
    comparator.compare_audio(comparison_files, f'sample_{sample_number}_snr{snr_db}')

In [5]:
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

# Visualize the time-frequency analysis and save the plot
audio_file = './sample/422262.wav'
visualize_time_frequency_3d(audio_file, sr=22050, ax=ax)

In [7]:
import glob
ls = glob.glob('./sample/*') + glob.glob('./processed_audio/*')
for i in ls:
    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111, projection='3d')

    # Visualize the time-frequency analysis and save the plot
    audio_file = i
    visualize_time_frequency_3d(audio_file, sr=22050, ax=ax)

# Visualize original, convolved audios' mel-spectrogram

# Calculate RT60, C50