In [None]:
# load imports
import os
import librosa
import argparse
import numpy as np
from tqdm import tqdm
from scipy.io.wavfile import write

# Stabilization of tqdm
tqdm.monitor_interval = 0

## Mute the noise and other small echoes in audio

In [None]:
def mute_noise(data_path, save_path, sampling_rate=44100, top_db=30):
    for file in tqdm(os.listdir(data_path)):
        # load audio file
        audio = np.array(librosa.core.load(os.path.join(data_path, file), sampling_rate)[0])

        # split the audio at desired db
        split_audio = librosa.effects.split(audio, top_db)

        # create new audio with repaced silences with zeros
        new_audio = np.zeros(len(audio))
        for i, split in enumerate(split_audio):
            new_audio[split[0]:split[1]] = audio[split[0]:split[1]]

        waveform_integers = np.int16(new_audio * 32767)

        # save file with silences removed
        write(os.path.join(save_path, ('trimmed_' + file)), sampling_rate, waveform_integers)

In [None]:
data_path='./audio/'
save_path='./trim_audio'

# check if save path exists, else make it
if not os.path.exists(save_path):
    os.makedirs(save_path)

mute_noise(data_path, save_path)

## Mute the noise and other small echoes in audio, and chuck it to 1min intervals

In [None]:
def mute_noise_chucked(data_path, save_path, sampling_rate=44100, top_db=30, trim_sec=60):
    for file in tqdm(os.listdir(data_path)):
        # make sure only audio files are loaded
        if file.split('.')[-1] not in ['mp3', 'wav', 'm4a']: continue
            
        # load audio file
        audio = np.array(librosa.core.load(os.path.join(data_path, file), sampling_rate)[0])

        # split the audio at desired db
        split_audio = librosa.effects.split(audio, top_db)

        n = 0
        
        # create new audio with repaced silences with zeros
        new_audio = np.zeros(split_audio[0][0])
        for i, split in enumerate(split_audio):
            if (len(new_audio) + split_audio[i][1]-split_audio[i][0]) > (sampling_rate*trim_sec):
                # save file and create new array
                write(os.path.join(save_path, (f"{file.split('.')[0]}_noise_muted_{n}.{file.split('.')[1]}")), 
                                               sampling_rate, np.int16(new_audio * 32767))
                n += 1
                new_audio = np.zeros(0)
            new_audio = np.concatenate((new_audio, audio[split[0]:split[1]]))
            if i < len(split_audio)-1:
                if (len(new_audio) + split_audio[i+1][0]-split_audio[i][1]) > (sampling_rate*trim_sec):
                    # save file and create new array
                    write(os.path.join(save_path, (f"{file.split('.')[0]}_noise_muted_{n}.{file.split('.')[1]}")), 
                                                   sampling_rate, np.int16(new_audio * 32767))
                    n+=1
                    new_audio = np.zeros(split_audio[i+1][0]-split_audio[i][0])
                else:
                    new_audio = np.concatenate((new_audio, np.zeros(split_audio[i+1][0]-split_audio[i][1])))
        
        if len(new_audio) > 0:
            write(os.path.join(save_path, (f"{file.split('.')[0]}_noise_muted_{n}.{file.split('.')[1]}")), 
                                           sampling_rate, np.int16(new_audio * 32767))

In [None]:
data_path='./audio/'
save_path='./trim_audio'

# check if save path exists, else make it
if not os.path.exists(save_path):
    os.makedirs(save_path)

mute_noise_chucked(data_path, save_path)