In [None]:
import os
import random

import pandas as pd
import torch

from datasets.cv_clean_noisy_dataset import CvCleanNoisyDataset
from utils.audio_utils import spectrogram_to_waveform
from utils.file_utils import get_file_paths
from utils.plot_utils import show_spectrogram, show_waveform

In [None]:
# REQUIREMENTS
# download source files in ../audio_data
# generate metadata file
# install ffmpeg

data_dir = '../_audio_data'
sound_dir = '/UrbanSound8K/audio'
metadata_path = 'target/cv_metadata.tsv'

output_dir = '../_datasets/'
os.makedirs(output_dir, exist_ok=True)
output_file_name = output_dir + "cv_clean_noisy_dataset.pt"

files_count = 20000

In [None]:
clean_metadata = pd.read_csv(metadata_path, delimiter='\t')
clean_metadata['abs_path'] = os.path.abspath(data_dir) + '/' + clean_metadata['path']
clean_files = clean_metadata['abs_path'].tolist()

In [None]:
sounds_path = os.path.abspath(data_dir + sound_dir)
sound_files = get_file_paths([sounds_path], 'wav')

In [None]:
dataset = CvCleanNoisyDataset(clean_files, sound_files, files_count)

In [None]:
torch.save(dataset, output_file_name)

In [None]:
idx = random.randint(0, 100)
clean_spectrogram = dataset.__getitem__(idx)[1]
noisy_spectrogram = dataset.__getitem__(idx)[0]

In [None]:
clean_waveform = spectrogram_to_waveform(clean_spectrogram)
noisy_waveform = spectrogram_to_waveform(noisy_spectrogram)

In [None]:
show_spectrogram(clean_waveform, "Clean spectrogram")
show_spectrogram(noisy_waveform, "Noisy spectrogram")

In [None]:
show_waveform(clean_waveform, "Clean waveform")
show_waveform(noisy_waveform, "Noisy waveform")