In [3]:
import os
import sys
import array
import random
import wave

import numpy as np
import matplotlib.pyplot as plt
import librosa

#repos_dir = r'/home/takkan/repos'
repos_dir = r'/home/akikun/repos'
sys.path.append(repos_dir)

imgan_dir = os.path.join(repos_dir, 'Intelligibility-MetricGAN')
sys.path.append(imgan_dir)

from sak import display as dp
from sak import signal_processing as sp
from sak import nele

In [4]:
# Signal-to-Noise
snr = -10.0

# open iMetricGAN sample files.
# wav_dir = os.path.join(imgan_dir, 'JR_database')
# clean_file = os.path.join(wav_dir, 'Train/Clean/Train_001.wav')
# noise_file = os.path.join(wav_dir, 'Train/Noise/Train_001.wav')
wav_dir = os.path.join(imgan_dir, 'database')
clean_file = os.path.join(wav_dir, 'Train', 'Clean', 'Train_1.wav')
noise_file = os.path.join(wav_dir, 'Train', 'Noise', 'Train_1.wav')
clean_wav = wave.open(clean_file, "r")
noise_wav = wave.open(noise_file, "r")

## by adding noise to a clean audio, make a new wav file with a given SNR.  

In [16]:
'''
Reference: https://github.com/Sato-Kunihiko/audio-SNR
'''
def cal_amp(wf):
    buffer = wf.readframes(wf.getnframes())
    amptitude = (np.frombuffer(buffer, dtype="int16")).astype(np.float64)
    return amptitude    

def cal_rms(amp):
    return np.sqrt(np.mean(np.square(amp), axis=-1))

def cal_adjusted_rms(clean_rms, snr):
    a = float(snr) / 20
    noise_rms = clean_rms / (10**a) 
    return noise_rms


In [19]:
# Calculation of amp
clean_amp = cal_amp(clean_wav)
noise_amp = cal_amp(noise_wav)
print(clean_amp, noise_amp)

[0. 0. 0. ... 5. 3. 1.] [-5625. -5716. -5087. ... -5234. -4326. -3110.]


In [20]:
# Calculation of RMS
start = random.randint(0, len(noise_amp)-len(clean_amp))
clean_rms = cal_rms(clean_amp)
split_noise_amp = noise_amp[start: start + len(clean_amp)]
noise_rms = cal_rms(split_noise_amp)

print(clean_rms, noise_rms)

655.3601012333713 3685.362157923615


In [None]:
# Synthesize waveforms of any size
adjusted_noise_rms = cal_adjusted_rms(clean_rms, snr)
        
adjusted_noise_amp = split_noise_amp * (adjusted_noise_rms / noise_rms) 
mixed_amp = (clean_amp + adjusted_noise_amp)

# Normalized so as not to crack the sound
if (mixed_amp.max(axis=0) > 32767): 
    mixed_amp = mixed_amp * (32767/mixed_amp.max(axis=0))
    clean_amp = clean_amp * (32767/mixed_amp.max(axis=0))
    adjusted_noise_amp = adjusted_noise_amp * (32767/mixed_amp.max(axis=0))


In [None]:
# Save the waveform as a wav file
noisy_wave = wave.Wave_write('output_noisy_file.wav')
noisy_wave.setparams(clean_wav.getparams())
noisy_wave.writeframes(array.array('h', mixed_amp.astype(np.int16)).tostring() )
noisy_wave.close()


In [None]:
dp.disp_wav('output_noisy_file.wav')

## Check sak/nele/add_noise function.

In [None]:
add_noise(clean_file, noise_file, 'output_noisy_file_2.wav', -10.0)

In [None]:
dp.disp_wav('output_noisy_file_2.wav')

## implement the same function using librosa and sak.

In [6]:
def add_noise2(wav_clean_path, wav_noise_path, wav_mixed_path, snr=0, sampling_frequency=44100):
    # load signal from wav files.
    signal_clean = sp.load_wav(wav_clean_path)
    signal_noise = sp.load_wav(wav_noise_path)

    # calculate average rms. 
    rms_clean = np.mean(sp.calc_rms(signal_clean))
    rms_noise = np.mean(sp.calc_rms(signal_noise))

    # energy_noise = energy_signal / exp(SNR / 20)
    a = float(snr) / 20
    rms_noise_desired = rms_clean / (10**a)

    # adjust rms of noise and add to signal_clean.
    signal_mixed = signal_clean + signal_noise * rms_noise_desired / rms_noise

    # output the signal.
    librosa.output.write_wav(wav_mixed_path, signal_mixed, sampling_frequency)

In [9]:
wav_clean_path = clean_file
wav_noise_path = noise_file
wav_mixed_path = 'mixed.wav'
sampling_frequency = 44100

nele.add_noise2(
    wav_clean_path, 
    wav_noise_path, 
    wav_mixed_path, 
    snr=0, 
    sampling_frequency=sampling_frequency)

In [10]:
dp.disp_wav(clean_file)
dp.disp_wav(noise_file)
dp.disp_wav(wav_mixed_path)