In [20]:
from speechbrain.inference.separation import SepformerSeparation as separator
import torchaudio
from IPython.display import Audio

model = separator.from_hparams(source="speechbrain/sepformer-dns4-16k-enhancement", savedir='pretrained_models/sepformer-dns4-16k-enhancement')

# for custom file, change path
est_sources = model.separate_file(path='/home/aicontest/DF/data/audio/unlabeled_data/ABJGMLHQ.ogg') 


Resampling the audio from 32000 Hz to 16000 Hz


In [21]:
Audio(est_sources[:, :, 0].detach().cpu(), rate=16000)

In [22]:
Audio(est_sources[:, :, 1].detach().cpu(), rate=16000)

IndexError: index 1 is out of bounds for dimension 2 with size 1

In [27]:
import torch
import torchaudio
from speechbrain.inference.enhancement import SpectralMaskEnhancement, WaveformEnhancement

enhance_model = WaveformEnhancement.from_hparams(
    source="speechbrain/mtl-mimic-voicebank",
    savedir="pretrained_models/mtl-mimic-voicebank",
)

# Load and add fake batch dimension
noisy = enhance_model.load_audio(
    "/home/aicontest/DF/data/audio/test/TEST_00228.ogg"
).unsqueeze(0)

# Add relative length tensor
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))

# Saving enhanced signal on disk
# Audio(enhanced.detach().cpu(), 16000)
torchaudio.save('enhanced.wav', enhanced.cpu(), 16000)


In [32]:
from pydub import AudioSegment
import numpy as np
from scipy.signal import butter, lfilter, fftconvolve
import io
from IPython.display import Audio, display

# ffmpeg 경로 설정 (필요시 수정)
AudioSegment.ffmpeg = "ffmpeg"  # 또는 ffmpeg.exe가 위치한 경로를 명시적으로 설정

# 오디오 파일 로드
def load_audio(file_path):
    audio = AudioSegment.from_file(file_path, format="ogg")
    return audio

# 노이즈 제거를 위한 저역통과 필터
def butter_lowpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

def lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y

# 노이즈 제거 알고리즘
def noise_reduction(audio):
    samples = np.array(audio.get_array_of_samples())
    fs = audio.frame_rate

    # 저역통과 필터 적용 (새 지저귀는 소리와 차량 경적 소리 필터링)
    cutoff = 1000.0  # 1kHz 이하의 주파수만 통과
    filtered_samples = lowpass_filter(samples, cutoff, fs, order=6)

    # 노이즈 감소
    reduced_noise_samples = fftconvolve(filtered_samples, np.ones(500) / 500, mode='same')

    # 다시 AudioSegment로 변환
    reduced_noise_audio = audio._spawn(reduced_noise_samples.astype(np.int16).tobytes())
    return reduced_noise_audio

# 오디오를 IPython.display.Audio로 재생
def play_audio(audio):
    # AudioSegment를 bytes로 변환
    raw_bytes = audio.raw_data
    sample_rate = audio.frame_rate
    return Audio(data=raw_bytes, rate=sample_rate)

# 예제 사용
input_path = "/home/aicontest/DF/data/audio/test/TEST_00228.ogg"

audio = load_audio(input_path)
reduced_noise_audio = noise_reduction(audio)
play_audio(reduced_noise_audio)


In [38]:
from speechbrain.inference.separation import SepformerSeparation as separator
import torchaudio

model = separator.from_hparams(source="speechbrain/sepformer-wham16k-enhancement", savedir='pretrained_models/sepformer-wham16k-enhancement')

# for custom file, change path
est_sources = model.separate_file(path='/home/aicontest/DF/data/audio/test/TEST_49836.ogg') 

Audio(est_sources[:, :, 0].detach().cpu(), rate=16000)
# torchaudio.save("enhanced_wham16k.wav", est_sources[:, :, 0].detach().cpu(), 16000)


Resampling the audio from 32000 Hz to 16000 Hz


In [40]:
"""
This class has two main functions

    - De-noising the file (pywt does it)
    - Creating a Noise Profile (parses the signal and creates a profile very memory heavy)
"""

import numpy as np
import pywt
import soundfile
from tqdm import tqdm

from lib.noiseProfiler import NoiseProfiler


def mad(arr):
    """ Median Absolute Deviation: a "Robust" version of standard deviation.
        Indices variability of the sample.
        https://en.wikipedia.org/wiki/Median_absolute_deviation 
    """
    arr = np.ma.array(arr).compressed()
    med = np.median(arr)
    return np.median(np.abs(arr - med))


class AudioDeNoise:
    """
    Class to de-noise the audio signal. The audio file is read in chunks and processed,
    cleaned and appended to the output file..

    It can de-noise multiple channels, any sized file, formats supported by soundfile

    Wavelets used ::
        Daubechies 4 : db4
        Level : decided by pyWavelets

    Attributes
    ----------
    __inputFile : str
        name of the input audio file

    Examples
    --------
    To de noise an audio file

    >>> audioDenoiser = AudioDeNoise("input.wav")
    >>> audioDenoiser.deNoise("input_denoised.wav")

    To generate the noise profile

    >>> audioDenoiser = AudioDeNoise("input.wav")
    >>> audioDenoiser.generateNoiseProfile("input_noise_profile.wav")
    """

    def __init__(self, inputFile):
        self.__inputFile = inputFile
        self.__noiseProfile = None

    def deNoise(self, outputFile):
        """
        De-noising function that reads the audio signal in chunks and processes
        and writes to the output file efficiently.

        VISU Shrink is used to generate the noise threshold

        Parameters
        ----------
        outputFile : str
            de-noised file name

        """
        info = soundfile.info(self.__inputFile)  # getting info of the audio
        rate = info.samplerate

        with soundfile.SoundFile(outputFile, "w", samplerate=rate, channels=info.channels) as of:
            for block in tqdm(soundfile.blocks(self.__inputFile, int(rate * info.duration * 0.10))):
                coefficients = pywt.wavedec(block, 'db4', mode='per', level=2)

                #  getting variance of the input signal
                sigma = mad(coefficients[- 1])

                # VISU Shrink thresholding by applying the universal threshold proposed by Donoho and Johnstone
                thresh = sigma * np.sqrt(2 * np.log(len(block)))

                # thresholding using the noise threshold generated
                coefficients[1:] = (pywt.threshold(i, value=thresh, mode='soft') for i in coefficients[1:])

                # getting the clean signal as in original form and writing to the file
                clean = pywt.waverec(coefficients, 'db4', mode='per')
                of.write(clean)

    def generateNoiseProfile(self, noiseFile):
        """
        Parses the input signal and generate the noise profile using wavelet helper
        Look into lib modules to see how the parsing is done

        NOTE: Heavy on the memory, suitable for small files.

        Parameters
        ----------
        noiseFile : str
            name for the noise signal extracted
        """
        data, rate = soundfile.read(noiseFile)
        self.__noiseProfile = NoiseProfiler(data)
        noiseSignal = self.__noiseProfile.getNoiseDataPredicted()

        soundfile.write(noiseFile, noiseSignal, rate)

    def __del__(self):
        """
        clean up
        """
        del self.__noiseProfile

ModuleNotFoundError: No module named 'lib'